In [1]:
from games.kuhn.kuhn import KuhnPoker
from agents.counterfactualregret import CounterFactualRegret
from collections import OrderedDict

In [2]:
g = KuhnPoker()

In [4]:
agent_classes = [ CounterFactualRegret, CounterFactualRegret ]
my_agents = {}
g.reset()
for i, agent in enumerate(g.agents):
    my_agents[agent] = agent_classes[i](game=g, agent=agent)

In [5]:
for agent in g.agents:
    print('Training agent ' + agent)
    my_agents[agent].train(100000)
    print('Agent ' + agent + ' policies:')
    print(OrderedDict(map(lambda n: (n, my_agents[agent].node_dict[n].policy()), sorted(my_agents[agent].node_dict.keys()))))
    print('')

Training agent agent_0
Agent agent_0 policies:
OrderedDict([('0', array([0.66350825, 0.33649175])), ('0b', array([9.99970094e-01, 2.99060949e-05])), ('0p', array([0.66332467, 0.33667533])), ('0pb', array([9.99966260e-01, 3.37401801e-05])), ('1', array([9.99790526e-01, 2.09474403e-04])), ('1b', array([0.65767273, 0.34232727])), ('1p', array([9.99919855e-01, 8.01450626e-05])), ('1pb', array([0.32780034, 0.67219966])), ('2', array([7.05586736e-05, 9.99929441e-01])), ('2b', array([3.00372462e-05, 9.99969963e-01])), ('2p', array([9.01117386e-05, 9.99909888e-01])), ('2pb', array([0.26614015, 0.73385985]))])

Training agent agent_1
Agent agent_1 policies:
OrderedDict([('0', array([0.67969854, 0.32030146])), ('0b', array([9.99969809e-01, 3.01905021e-05])), ('0p', array([0.66713943, 0.33286057])), ('0pb', array([9.99966954e-01, 3.30460033e-05])), ('1', array([9.99659972e-01, 3.40028355e-04])), ('1b', array([0.65056877, 0.34943123])), ('1p', array([9.99955113e-01, 4.48873328e-05])), ('1pb', arra

In [6]:
cum_rewards = dict(map(lambda agent: (agent, 0.), g.agents))
niter = 2000
for _ in range(niter):
    g.reset()
    turn = 0
    while not g.done():
        #print('Turn: ', turn)
        #print('\tPlayer: ', g.agent_selection)
        #print('\tObservation: ', g.observe(g.agent_selection))
        a = my_agents[g.agent_selection].action()
        #print('\tAction: ', g._moves[a])
        g.step(action=a)
        turn += 1
    #print('Rewards: ', g.rewards)
    for agent in g.agents:
        cum_rewards[agent] += g.rewards[agent]
print('Average rewards:', dict(map(lambda agent: (agent, cum_rewards[agent]/niter), g.agents)))


Average rewards: {'agent_0': 0.05, 'agent_1': -0.05}


In [7]:
print('Check learned policies against theoretical policies:')

Check learned policies against theoretical policies:


In [8]:
JX_b = my_agents[g.agents[0]].node_dict['0'].policy()[1]
print(f'Agent: 0 - Hand: J_ - History: [] - Probability of betting: {JX_b}')


Agent: 0 - Hand: J_ - History: [] - Probability of betting: 0.33649174933368675


In [9]:
QX_pb_b = my_agents[g.agents[0]].node_dict['1pb'].policy()[1]
print(f'Agent: 0 - Hand: Q_ - History: pb - Probability of betting: {QX_pb_b} - Theoretic value: {JX_b+1/3} -  Difference: {abs(QX_pb_b - (JX_b+1/3))}')


Agent: 0 - Hand: Q_ - History: pb - Probability of betting: 0.6721996620132858 - Theoretic value: 0.66982508266702 -  Difference: 0.002374579346265837


In [10]:
KX_b = my_agents[g.agents[0]].node_dict['2'].policy()[1]
print(f'Agent: 0 - Hand: K_ - History: [] - Probability of betting: {KX_b} - Theoretic value: {3 * JX_b} -  Difference: {abs(KX_b - 3 * JX_b)}')


Agent: 0 - Hand: K_ - History: [] - Probability of betting: 0.9999294413263614 - Theoretic value: 1.0094752480010603 -  Difference: 0.009545806674698842


In [11]:
XJ_p_b = my_agents[g.agents[0]].node_dict['0p'].policy()[1]
print(f'Agent: 0 - Hand: _J - History: p - Probability of betting: {XJ_p_b} - Theoretic value: {1/3} -  Difference: {abs(XJ_p_b - 1/3)}')

Agent: 0 - Hand: _J - History: p - Probability of betting: 0.33667533339142314 - Theoretic value: 0.3333333333333333 -  Difference: 0.0033420000580898224


In [12]:
XQ_b_b = my_agents[g.agents[0]].node_dict['1b'].policy()[1]
print(f'Agent: 0 - Hand: _Q - History: b - Probability of betting: {XQ_b_b} - Theoretic value: {1/3} -  Difference: {abs(XQ_b_b - 1/3)}')

Agent: 0 - Hand: _Q - History: b - Probability of betting: 0.34232726763897287 - Theoretic value: 0.3333333333333333 -  Difference: 0.00899393430563955
