In [1]:
from games.kuhn.kuhn import KuhnPoker
from agents.counterfactualregret import CounterFactualRegret
from collections import OrderedDict

In [2]:
g = KuhnPoker()

In [3]:
agent_classes = [ CounterFactualRegret, CounterFactualRegret ]
my_agents = {}
g.reset()
for i, agent in enumerate(g.agents):
    my_agents[agent] = agent_classes[i](game=g, agent=agent)

In [None]:
for agent in g.agents:
    print('Training agent ' + agent)
    my_agents[agent].train(100000)
    print('Agent ' + agent + ' policies:')
    print(OrderedDict(map(lambda n: (n, my_agents[agent].node_dict[n].policy()), sorted(my_agents[agent].node_dict.keys()))))
    print('')

Training agent agent_0


In [None]:
from graphviz import Digraph

def graficar_politica_tree(politicas: dict[str, np.ndarray], nombre='Agent_Policy'):
    dot = Digraph(comment='CFR Policy Tree')
    dot.attr('node', shape='box', style='rounded,filled', fillcolor='lightgrey')

    for estado, probs in politicas.items():
        etiqueta = f"{estado}\\n[p: {probs[0]:.2f}, b: {probs[1]:.2f}]"
        dot.node(estado, etiqueta)

        if len(estado) > 1:
            padre = estado[:-1]
            accion = estado[-1]
            dot.edge(padre, estado, label=accion)

    dot.render(filename=nombre, format='png', cleanup=True)
    print(f"Diagrama guardado como {nombre}.png")


In [None]:
politicas = OrderedDict(map(lambda n: (n, my_agents[agent].node_dict[n].policy()), sorted(my_agents[agent].node_dict.keys())))
graficar_politica_tree(politicas, nombre="agent_0_policy")

In [None]:
cum_rewards = dict(map(lambda agent: (agent, 0.), g.agents))
niter = 2000
for _ in range(niter):
    g.reset()
    turn = 0
    while not g.done():
        #print('Turn: ', turn)
        #print('\tPlayer: ', g.agent_selection)
        #print('\tObservation: ', g.observe(g.agent_selection))
        a = my_agents[g.agent_selection].action()
        #print('\tAction: ', g._moves[a])
        g.step(action=a)
        turn += 1
    #print('Rewards: ', g.rewards)
    for agent in g.agents:
        cum_rewards[agent] += g.rewards[agent]
print('Average rewards:', dict(map(lambda agent: (agent, cum_rewards[agent]/niter), g.agents)))


Average rewards: {'agent_0': -0.098, 'agent_1': 0.098}


In [None]:
print('Check learned policies against theoretical policies:')

Check learned policies against theoretical policies:


In [None]:
JX_b = my_agents[g.agents[0]].node_dict['0'].policy()[1]
print(f'Agent: 0 - Hand: J_ - History: [] - Probability of betting: {JX_b}')


Agent: 0 - Hand: J_ - History: [] - Probability of betting: 0.10355011505845793


In [None]:
QX_pb_b = my_agents[g.agents[0]].node_dict['1pb'].policy()[1]
print(f'Agent: 0 - Hand: Q_ - History: pb - Probability of betting: {QX_pb_b} - Theoretic value: {JX_b+1/3} -  Difference: {abs(QX_pb_b - (JX_b+1/3))}')


Agent: 0 - Hand: Q_ - History: pb - Probability of betting: 0.43758577337494275 - Theoretic value: 0.4368834483917913 -  Difference: 0.0007023249831514811


In [None]:
KX_b = my_agents[g.agents[0]].node_dict['2'].policy()[1]
print(f'Agent: 0 - Hand: K_ - History: [] - Probability of betting: {KX_b} - Theoretic value: {3 * JX_b} -  Difference: {abs(KX_b - 3 * JX_b)}')


Agent: 0 - Hand: K_ - History: [] - Probability of betting: 0.32074509924715594 - Theoretic value: 0.3106503451753738 -  Difference: 0.01009475407178212


In [None]:
XJ_p_b = my_agents[g.agents[0]].node_dict['0p'].policy()[1]
print(f'Agent: 0 - Hand: _J - History: p - Probability of betting: {XJ_p_b} - Theoretic value: {1/3} -  Difference: {abs(XJ_p_b - 1/3)}')

Agent: 0 - Hand: _J - History: p - Probability of betting: 0.33362013601498486 - Theoretic value: 0.3333333333333333 -  Difference: 0.00028680268165154343


In [None]:
XQ_b_b = my_agents[g.agents[0]].node_dict['1b'].policy()[1]
print(f'Agent: 0 - Hand: _Q - History: b - Probability of betting: {XQ_b_b} - Theoretic value: {1/3} -  Difference: {abs(XQ_b_b - 1/3)}')

Agent: 0 - Hand: _Q - History: b - Probability of betting: 0.3274301069845622 - Theoretic value: 0.3333333333333333 -  Difference: 0.005903226348771107
