In [1]:
 import random

In [2]:
##Returns the adjusted strategy after an iteration
def getStrategy(regretSum,strategySum):
    ''' 
    Define each variable and give example values:
    - regretSum = [-2, -5, 2] ---> cumulative regret
    - strategy = [0.3333333333333333, 0.3333333333333333, 0.3333333333333333] or [0.0, 0.0, 1.0] or [0.3, 0.4, 0.3] ---> policy yielded by regret matching for next RPS showdown
    - strategySum = [1.0, 0.3333333333333333, 3.6666666666666665] ---> sum of probabilities in strategy array over all iterations
    - normalizingSum = 37 ---> used to get strategy array probabilities ---> strategy[i] = strategy[i]/normalizingSum ---> sum of positive regrets only
    '''
    actions = 3
    normalizingSum = 0
    strategy = [0,0,0]
    #Normalizingsum is the sum of positive regrets. 
    #This ensures do not 'over-adjust' and converge to equilibrium
    for i in range(0,actions):
        if regretSum[i] > 0:
            strategy[i] = regretSum[i]
        else:
            strategy[i] = 0
        normalizingSum += strategy[i]
    ##This loop normalizes our updated strategy
    for i in range(0,actions):
        if normalizingSum > 0:
            strategy[i] = strategy[i]/normalizingSum
        else:
            #Default to 33%
            strategy[i] = 1.0 / actions
        strategySum[i] += strategy[i]
    return (strategy, strategySum)

In [3]:
#Returns a random action according to the strategy
def getAction(strategy):
    ''' 
    As an example, use strategy = [0.3, 0.4, 0.3]
    This function returns 'ROCK' (0) if a random uniform generated value falls between 0-0.3
    This function returns 'PAPER' (2) if a random uniform generated value falls between 0.3-0.7 
    This function returns 'SCISSORS' (2) if a random uniform generated value falls between 0.7-1 
    '''
    r = random.uniform(0,1)
    if r >= 0 and r < strategy[0]:
        return 0
    elif r >= strategy[0] and r < strategy[0] + strategy[1]:
        return 1
    elif r >= strategy[0] + strategy[1] and r < sum(strategy):
        return 2
    else:
        return 0

In [4]:
def getScore(p1_action, p2_action):
    if p1_action == p2_action:
        return 'draw'
    elif p1_action == 0 and p2_action == 1:
        return 'p2 wins'
    elif p1_action == 0 and p2_action == 2:
        return 'p1 wins'
    elif p1_action == 1 and p2_action == 0:
        return 'p1 wins'
    elif p1_action == 1 and p2_action == 2:
        return 'p2 wins'
    elif p1_action == 2 and p2_action == 0:
        return 'p2 wins'
    elif p1_action == 2 and p2_action == 1:
        return 'p1 wins'

In [8]:
def train(iterations,p1_regretSum, p2_regretSum):
    ''' 
    Define each variable and give example values:
    - regretSum = [-2, -5, 2] ---> cumulative regret
    - oppStrategy = [0.3, 0.4, 0.3]
    - strategy = [0.3333333333333333, 0.3333333333333333, 0.3333333333333333] or [0.0, 0.0, 1.0] or [0.3, 0.4, 0.3] ---> policy yielded by regret matching for next RPS showdown
    - strategySum = [1.0, 0.3333333333333333, 3.6666666666666665] ---> sum of probabilities in strategy array over all iterations
    - myaction = 0, 1 or 2 ---> 'ROCK' 'PAPER' or 'SCISSORS'
    - otherAction = 0, 1 or 2 ---> 'ROCK' 'PAPER' or 'SCISSORS'
    - actionUtility = [-1, 1, -1] or [-1, 0, 1] ---> Hapiness you would have gotten from each of RPS had you chosen them against a fixed opponent choice like 'ROCK'. 
                      First the 3 actionUtilities are calculated for the opponent, then the action utility for myaction and opponent action pair is calculated. 
                      Then they're all used to get the regret and then regretSum
    '''
    p1_actionUtility = [0,0,0]
    p1_strategySum = [0,0,0]
    p2_actionUtility = [0,0,0]
    p2_strategySum = [0,0,0]
    actions = 3
    p1_wins_counter = 0
    p2_wins_counter = 0
    for i in range(0,iterations):
        ##Retrieve Actions
        p1_t = getStrategy(p1_regretSum,p1_strategySum)
        p1_strategy = [0.9, 0.05, 0.05] # <p1_t[0]-------------------------------------------------------------------------change to fixed or variable
        p1_strategySum = p1_t[1]
        p1_action = getAction(p1_strategy)
        #Define an arbitrary opponent strategy from which to adjust
        p2_t = getStrategy(p2_regretSum,p2_strategySum)
        p2_strategy = p2_t[0]
        p2_strategySum = p2_t[1]        
        p2_action = getAction(p2_strategy) 
        score = getScore(p1_action, p2_action)
        if score == 'p1 wins':  
            p1_wins_counter += 1  
        else: 
            p1_wins_counter += 0
        if score == 'p2 wins':  
            p2_wins_counter += 1  
        else: 
            p2_wins_counter += 0
        print('p1_strategy: ', p1_strategy, 'p2_strategy: ', p2_strategy)
        print('p1_regretSum: ', p1_regretSum, 'p2_regretSum: ', p2_regretSum)
        print('p1 chose: ', p1_action, 'p2 chose: ', p2_action)
        print(score)
        print('p1_wins_counter: ', p1_wins_counter, 'p2_wins_counter: ', p2_wins_counter)

        #Opponent Chooses scissors
        if p2_action == actions - 1: # FALSE: 3-1 = 2, WE HAVE otherAction == 1 
            #Utility(Rock) = 1
            p1_actionUtility[0] = 1
            #Utility(Paper) = -1
            p1_actionUtility[1] = -1
        #Opponent Chooses Rock
        elif p2_action == 0: # FALSE: WE HAVE otherAction == 1 
            #Utility(Scissors) = -1
            p1_actionUtility[actions - 1] = -1
            #Utility(Paper) = 1
            p1_actionUtility[1] = 1
        #Opopnent Chooses Paper
        else: # TRUE: WE HAVE otherAction == 1 
            #Utility(Rock) = -1
            p1_actionUtility[0] = -1
            #Utility(Scissors) = 1
            p1_actionUtility[2] = 1

        #Opponent Chooses scissors
        if p1_action == actions - 1: # FALSE: 3-1 = 2, WE HAVE otherAction == 1 
            #Utility(Rock) = 1
            p2_actionUtility[0] = 1
            #Utility(Paper) = -1
            p2_actionUtility[1] = -1
        #Opponent Chooses Rock
        elif p1_action == 0: # FALSE: WE HAVE otherAction == 1 
            #Utility(Scissors) = -1
            p2_actionUtility[actions - 1] = -1
            #Utility(Paper) = 1
            p2_actionUtility[1] = 1
        #Opopnent Chooses Paper
        else: # TRUE: WE HAVE otherAction == 1 
            #Utility(Rock) = -1
            p2_actionUtility[0] = -1
            #Utility(Scissors) = 1
            p2_actionUtility[2] = 1
                    
        #Add the regrets from this decision
        for i in range(0,actions):
            p1_regretSum[i] += p1_actionUtility[i] - p1_actionUtility[p1_action]
            p2_regretSum[i] += p2_actionUtility[i] - p2_actionUtility[p2_action]

    return p1_strategySum, p2_strategySum

In [9]:
def getAverageStrategy(iterations):
    ''' 
    Define each variable and give example values:
    - avgStrategy = [0, 0, 0] ---> a.k.a.: 'Maximally Exploitative Strategy' ---> Strategy to use in next round, considering info from all previous rounds put together
    - strategySum = [1.0, 0.3333333333333333, 3.6666666666666665] ---> sum of probabilities in strategy array over all iterations
    - oppStrategy = [0.3, 0.4, 0.3]
    - normalizingSum = 37 ---> used to get strategy array probabilities ---> strategy[i] = strategy[i]/normalizingSum ---> sum of positive regrets only
    '''
    actions = 3
    p1_strategySum, p2_strategySum = train(iterations, [0,0,0], [0,0,0])
    p1_avgStrategy = [0,0,0]
    p2_avgStrategy = [0,0,0]
    p1_normalizingSum = 0
    p2_normalizingSum = 0 
    for i in range(0,actions):
        p1_normalizingSum += p1_strategySum[i]
        p2_normalizingSum += p2_strategySum[i]
    for i in range(0,actions):
        if p1_normalizingSum > 0:
            p1_avgStrategy[i] = p1_strategySum[i] / p1_normalizingSum
        else:
            p1_avgStrategy[i] = 1.0 / actions
        if p2_normalizingSum > 0:
            p2_avgStrategy[i] = p2_strategySum[i] / p2_normalizingSum
        else:
            p2_avgStrategy[i] = 1.0 / actions
    print('avgStrategy: ', p1_avgStrategy)
    print('avgStrategy: ', p2_avgStrategy)
    return p1_avgStrategy, p2_avgStrategy

In [10]:
print("Maximally Exploitative Strat", getAverageStrategy(500))

p1_strategy:  [0.9, 0.05, 0.05] p2_strategy:  [0.3333333333333333, 0.3333333333333333, 0.3333333333333333]
p1_regretSum:  [0, 0, 0] p2_regretSum:  [0, 0, 0]
p1 chose:  0 p2 chose:  2
p1 wins
p1_wins_counter:  1 p2_wins_counter:  0
p1_strategy:  [0.9, 0.05, 0.05] p2_strategy:  [0.3333333333333333, 0.6666666666666666, 0.0]
p1_regretSum:  [0, -2, -1] p2_regretSum:  [1, 2, 0]
p1 chose:  0 p2 chose:  0
draw
p1_wins_counter:  1 p2_wins_counter:  0
p1_strategy:  [0.9, 0.05, 0.05] p2_strategy:  [0.25, 0.75, 0.0]
p1_regretSum:  [0, -2, -3] p2_regretSum:  [1, 3, -1]
p1 chose:  0 p2 chose:  1
p2 wins
p1_wins_counter:  1 p2_wins_counter:  1
p1_strategy:  [0.9, 0.05, 0.05] p2_strategy:  [0.0, 1.0, 0.0]
p1_regretSum:  [0, 0, -1] p2_regretSum:  [0, 3, -3]
p1 chose:  0 p2 chose:  1
p2 wins
p1_wins_counter:  1 p2_wins_counter:  2
p1_strategy:  [0.9, 0.05, 0.05] p2_strategy:  [0.0, 1.0, 0.0]
p1_regretSum:  [0, 2, 1] p2_regretSum:  [-1, 3, -5]
p1 chose:  0 p2 chose:  1
p2 wins
p1_wins_counter:  1 p2_wins