### Agent
Definition of the agent class which contains the history of the values and the methods to select a position for the game and play it.

In [31]:
import numpy as np

class Agent(object):
    def __init__(self, game_values):
        self.game_values = game_values
        self.reset()
    
    def reset(self):
        self.actions_count = np.zeros(9) # count for the action taken
        self.values = np.empty(9, dtype=object) # values obtained so far -> average
    
    def get_game_values(self, position_1, position_2):
        '''
        Obtains a tuple with the values for player 1 and player 2 after each of them choose an
        action/position to play. Player 1 is the row player and Player 2 the column one
        '''
        return (np.random.normal(self.game_values[position_1][position_2][0], self.game_values[position_1][position_2][1]),
                np.random.normal(self.game_values[position_1][position_2][0], self.game_values[position_1][position_2][1]))

    def get_game_value(self, position_1, position_2):
        '''
        Obtains a single value with the average of the values obtained for player 1 and player 2 
        after each of them choose an action/position to play.
        Player 1 is the row player and Player 2 the column one
        '''
        val = self.get_game_values(position_1, position_2)
        return (val[0] + val[1]) / 2

    def play(self):
        '''
        Defines a step in the game. A set of actions is chosen and the output of the game is
        computed according to them.
        '''
        pos = self.get_position()
        value = self.get_game_value(pos[0], pos[1])
        
        
        self.values[pos[2]] = (self.values[pos[2]] * self.actions_count[pos[2]] + value) / (self.actions_count[pos[2]] + 1)
        self.actions_count[pos[2]] += 1
    
    def get_position(self):
        '''
        Gets the position for the following game based on the policy of the agent.
        For this base class the policy follows a random choice
        '''
        row = np.random.choice(3)
        col = np.random.choice(3)
        return row, col, row * 3 + col 
        
class BoltzmannJointActionLearner(Agent):
    def __init__(self, game_values, t):
        super(BoltzmannJointActionLearner, self).__init__(game_values)
        self.t = t
        
    def get_position(self):
        '''
        Gets the position for the following game based on the policy of the agent.
        For this class the decision is taken based on the boltzmann definition
        '''
        numerator = np.exp(self.values / self.t)
        denominator = np.sum(numerator)
        pdf = numerator / denominator #probability distribution function
        pos = np.random.choice(len(self.values), p=pdf)
        return pos / 3, pos % 3, pos
    


In [37]:
import numpy as np

sigma = 0.2
sigma0 = 0.2
sigma1 = 0.2

game_values = [[(11, sigma0), (-30, sigma), (0, sigma)],
              [(-30, sigma), (7, sigma1), (6, sigma)],
              [(0, sigma), (0, sigma), (5, sigma)]]

ag = Agent(game_values)

#val = ag.get_game_values(1,1)
print(np.random.choice(3))



0
