This file is to run manually a head to head evaluation

In [None]:
import copy
from agent_configs.cfr_config import CFRConfig
from active_player import ActivePlayer
from cfr_agent import CFRAgent
import torch
from cfr_network import CFRNetwork
from cfr_utils import evaluatebots, WrapperEnv, load_agents
import pyspiel
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

fhp = pyspiel.load_game(
    "universal_poker",
    {
        "numPlayers": 2,
        "numSuits": 4,
        "numRanks": 13,
        "numHoleCards": 2,
        "numBoardCards": "0 3",
        "bettingAbstraction": "fcpa",
        "numRounds": 2,
        "blind": "50 100",
    },
)
leduc = pyspiel.load_game(
    "universal_poker",
    {
        "numPlayers": 2,
        "numSuits": 2,
        "numRanks": 3,
        "numHoleCards": 1,
        "numBoardCards": "0 1",
        "bettingAbstraction": "fcpa",
        "numRounds": 2,
        "blind": "50 100",
    },
)
leducconfig = {"state_representation_size": 16}
fhpconfig = {"state_representation_size": 108}
leducgame = WrapperEnv(leduc)
fhpgame = WrapperEnv(fhp)

chosen_game = "fhp"

hidden_dim = 128
input_dim = 16 if chosen_game == "leduc" else 108
output_dim = 4
num_players = 2
replay_buffer_size = 4000000
minibatch_size = 10000
steps_per_epoch = 3000
traversals = 3000
training_steps = 20000
lr = 0.0001
optimizer = None

p_v_networks = {
    "input_shape": input_dim,
    "output_shape": output_dim,
    "hidden_size": hidden_dim,
    "learning_rate": lr,
    "optimizer": optimizer,
}
active_player_obj = ActivePlayer(num_players)
# path1 = 'checkpoints/CFR_0.001LR_TEST_MC_FHP/policy/linear/1005275/CFR_0.001LR_TEST_MC_FHP_56.pt'
# path2 = 'checkpoints/CFR_0.001LR_TEST_MC_FHP/policy/linear/2006646/CFR_0.001LR_TEST_MC_FHP_112.pt'
# path3 = 'checkpoints/CFR_0.001LR_TEST_MC_FHP/policy/linear/3006144/CFR_0.001LR_TEST_MC_FHP_168.pt'
# path4 = 'checkpoints/CFR_0.001LR_TEST_MC_FHP/policy/linear/4014284/CFR_0.001LR_TEST_MC_FHP_224.pt'
# path5 = 'checkpoints/CFR_0.001LR_TEST_MC_FHP/policy/linear/5010481/CFR_0.001LR_TEST_MC_FHP_279.pt'
# path6 = 'checkpoints/CFR_0.001LR_TEST_MC_FHP/policy/linear/6001547/CFR_0.001LR_TEST_MC_FHP_332.pt'
# path7 = 'checkpoints/CFR_0.001LR_TEST_MC_FHP/policy/linear/7001642/CFR_0.001LR_TEST_MC_FHP_387.pt'
# path8 = 'checkpoints/CFR_0.001LR_TEST_MC_FHP/policy/linear/8016903/CFR_0.001LR_TEST_MC_FHP_439.pt'
# path9 = 'checkpoints/CFR_0.001LR_TEST_MC_FHP/policy/linear/9020095/CFR_0.001LR_TEST_MC_FHP_493.pt'
# path10 = 'checkpoints/CFR_0.001LR_TEST_MC_FHP/policy/linear/10015290/CFR_0.001LR_TEST_MC_FHP_546.pt'


path1 = "checkpoints/CFR_0.001LR_TEST_MC_FHP/policy/notlinear/1005275/CFR_0.001LR_TEST_MC_FHP_56.pt"
path2 = "checkpoints/CFR_0.001LR_TEST_MC_FHP/policy/notlinear/2006646/CFR_0.001LR_TEST_MC_FHP_112.pt"
path3 = "checkpoints/CFR_0.001LR_TEST_MC_FHP/policy/notlinear/3006144/CFR_0.001LR_TEST_MC_FHP_168.pt"
path4 = "checkpoints/CFR_0.001LR_TEST_MC_FHP/policy/notlinear/4014284/CFR_0.001LR_TEST_MC_FHP_224.pt"
path5 = "checkpoints/CFR_0.001LR_TEST_MC_FHP/policy/notlinear/5010481/CFR_0.001LR_TEST_MC_FHP_279.pt"
path6 = "checkpoints/CFR_0.001LR_TEST_MC_FHP/policy/notlinear/6001547/CFR_0.001LR_TEST_MC_FHP_332.pt"
path7 = "checkpoints/CFR_0.001LR_TEST_MC_FHP/policy/notlinear/7001642/CFR_0.001LR_TEST_MC_FHP_387.pt"
path8 = "checkpoints/CFR_0.001LR_TEST_MC_FHP/policy/notlinear/8016903/CFR_0.001LR_TEST_MC_FHP_439.pt"
path9 = "checkpoints/CFR_0.001LR_TEST_MC_FHP/policy/notlinear/9020095/CFR_0.001LR_TEST_MC_FHP_493.pt"
path10 = "checkpoints/CFR_0.001LR_TEST_MC_FHP/policy/notlinear/10015290/CFR_0.001LR_TEST_MC_FHP_546.pt"

config = CFRConfig(
    config_dict={
        "network": {
            "policy": p_v_networks,
            "value": p_v_networks,
            "num_players": num_players,
        },
        "replay_buffer_size": replay_buffer_size,
        "minibatch_size": minibatch_size,
        "steps_per_epoch": steps_per_epoch,
        "traversals": traversals,
        "training_steps": training_steps,
        "active_player_obj": active_player_obj,
    },
    game_config={
        "num_players": num_players,
        "observation_space": input_dim,
        "action_space": 4,
    },
)

agent1, agent2 = load_agents(path1, path2, p_v_networks, num_players)
agent3, agent4 = load_agents(path3, path4, p_v_networks, num_players)
agent5, agent6 = load_agents(path5, path6, p_v_networks, num_players)
agent7, agent8 = load_agents(path7, path8, p_v_networks, num_players)
agent9, agent10 = load_agents(path9, path10, p_v_networks, num_players)
agents = [
    agent1,
    agent2,
    agent3,
    agent4,
    agent5,
    agent6,
    agent7,
    agent8,
    agent9,
    agent10,
]

In [None]:
results = [[0 for p in range(len(agents))] for _ in range(len(agents))]
for i in range(len(agents)):
    for j in range(len(agents)):
        if results[j][i] != 0:
            continue
        p1, p2 = evaluatebots(agents[i], agents[j], 100000, fhpgame, config, input_dim)
        results[i][j] = np.mean(p1) / 100

In [25]:
# rotate halft matrix such that all of the results are on the botton, inversion of axes
results = np.array(results)
plt.figure(figsize=(10, 8))
sns.heatmap(results, annot=True, fmt=".2f", cmap="coolwarm", cbar=True, linewidths=0.5)
plt.title("Agent Performance Heatmap in Exploitability")
plt.xlabel("Agent 2")
plt.ylabel("Agent 1")
plt.xticks(
    ticks=np.arange(len(agents)) + 0.5,
    labels=[f"Agent {i+1}" for i in range(len(agents))],
    rotation=45,
)
plt.yticks(
    ticks=np.arange(len(agents)) + 0.5,
    labels=[f"Agent {i+1}" for i in range(len(agents))],
    rotation=0,
)
plt.tight_layout()
plt.show()
# Save the plt to a file
plt.savefig("NOTLINEARCFR0.001FHPFULL100000.png")
plt.close()

  plt.show()
