### Agent
Definition of the agent class which contains the history of the values and the methods to select a position for the game and play it.

In [2]:
import numpy as np
import math

class Game(object):
    '''
    Definition of the game values and game actions (play) based on the involved agents
    '''
    def __init__(self, game_values, agent1, agent2):
        self.game_values = game_values
        self.agent1 = agent1
        self.agent2 = agent2
    
    def run(self, iterations):
        for i in range(iterations):
            self.agent1.compute_action()
            self.agent2.compute_action()
            self.play(self.agent1.last_action, self.agent2.last_action)
    
    def play(self, action_agent1, action_agent2):
        '''
        Defines a step in the game. Based on the input actions, the output of the game is
        computed.
        '''
        value = self.get_game_value(action_agent1, action_agent2)
        
        agent1.add_action_value(action_agent1, value)
        agent2.add_action_value(action_agent2, value)
        
    def run_joint(self, iterations):
        for i in range(iterations):
            self.agent1.compute_action()
            self.play(math.floor(self.agent1.last_action / 3) , self.agent1.last_action % 3)
        
    def play_joint(self, action_agent1, action_agent2):
        '''
        Defines a step in the game. Based on the input actions, the output of the game is
        computed. Differs from the independend learner just because the results are stored
        in a unique agent/player. Asumes that self.agent1 is a Joint Agent Learner 
        '''
        value = self.get_game_value(action_agent1, action_agent2)
        
        agent1.add_action_value(action_agent1, value)
        
    def get_game_value(self, position_1, position_2):
        '''
        Obtains a number with the values for the players after each of them choose an
        action/position to play. Player 1 is the row player and Player 2 the column one
        '''
        return np.random.normal(self.game_values[position_1][position_2][0], 
                                self.game_values[position_1][position_2][1])

class Agent(object):
    def __init__(self):
        self.reset()
    
    def reset(self):
        self.actions_count = np.zeros(3) # count for the action taken
        self.values = np.zeros(3) # values obtained so far -> average
    
    def compute_action(self, t = None):
        '''
        Gets the position/action for the following game based on the policy of the agent.
        For this base class the policy follows a random choice
        '''
        action = np.random.choice(3)
        self.last_action = action
        self.actions_count[action] += 1
    
    def add_action_value(self, action, value):
        self.values[action] = ((self.values[action] * (self.actions_count[action] - 1) + value) 
                               / (self.actions_count[action]))
        
class BoltzmannActionLearner(Agent):
    def __init__(self, t):
        self.t = t
        super(BoltzmannJointActionLearner, self).__init__()
        
    def compute_action(self, t = None):
        '''
        Gets the position/action for the following game based on the policy of the agent.
        For this class the decision is taken based on the boltzmann definition
        '''
        if t == None:
            t = self.t
        numerator = np.exp(self.values / t)
        denominator = np.sum(numerator)
        pdf = numerator / denominator #probability distribution function
        action = np.random.choice(len(self.values), p=pdf)
        self.last_action = action
        self.actions_count[action] += 1 #increment in the counter of the actions
    
class BoltzmannJointActionLearner(object):
    def __init__(self):
        self.reset()
    
    def reset(self):
        self.actions_count = np.zeros(9) # count for the action taken
        self.values = np.zeros(9) # values obtained so far -> average
    
    def compute_action(self, t = None):
        '''
        Gets the position/action for the following game based on the policy of the agent.
        For this class the decision is taken based on the boltzmann definition
        '''
        if t == None:
            t = self.t
        numerator = np.exp(self.values / t)
        denominator = np.sum(numerator)
        pdf = numerator / denominator #probability distribution function
        action = np.random.choice(len(self.values), p=pdf)
        self.last_action = action
        self.actions_count[action] += 1
    
    def add_action_value(self, action, value):
        self.values[action] = ((self.values[action] * (self.actions_count[action] - 1) + value) 
                               / (self.actions_count[action]))

In [37]:
sigma = 0.2
sigma0 = 0.2
sigma1 = 0.2

game_values = [[(11, sigma0), (-30, sigma), (0, sigma)],
              [(-30, sigma), (7, sigma1), (6, sigma)],
              [(0, sigma), (0, sigma), (5, sigma)]]

ag = Agent(game_values)





0
