In [None]:
from agent_configs import RainbowConfig
import gymnasium as gym
import torch
import random
import numpy as np
import torch
from utils import CategoricalCrossentropyLoss, KLDivergenceLoss
from utils.utils import HuberLoss
from cfr_utils import EvalWrapper, evaluatebots, WrapperEnv, load_agents, EmptyConf
import pyspiel
import copy
from agent_configs.cfr_config import CFRConfig
from active_player import ActivePlayer
from cfr_agent import CFRAgent
from cfr_network import CFRNetwork
import sys

sys.path.append("..")
from dqn.rainbow.rainbow_agent import RainbowAgent

fhp = pyspiel.load_game(
    "universal_poker",
    {
        "numPlayers": 2,
        "numSuits": 4,
        "numRanks": 13,
        "numHoleCards": 2,
        "numBoardCards": "0 3",
        "bettingAbstraction": "fcpa",
        "numRounds": 2,
        "blind": "50 100",
    },
)
leduc = pyspiel.load_game(
    "universal_poker",
    {
        "numPlayers": 2,
        "numSuits": 2,
        "numRanks": 3,
        "numHoleCards": 1,
        "numBoardCards": "0 1",
        "bettingAbstraction": "fcpa",
        "numRounds": 2,
        "blind": "50 100",
    },
)
leducconfig = {"state_representation_size": 16}
fhpconfig = {"state_representation_size": 108}
leducgame = WrapperEnv(leduc)
fhpgame = WrapperEnv(fhp)
chosen_game = "leduc"

hidden_dim = 128
input_dim = 16 if chosen_game == "leduc" else 108
output_dim = 4
num_players = 2
replay_buffer_size = 4000000
minibatch_size = 10000
steps_per_epoch = 3000
traversals = 3000
training_steps = 20000
lr = 0.0001
optimizer = None

p_v_networks = {
    "input_shape": input_dim,
    "output_shape": output_dim,
    "hidden_size": hidden_dim,
    "learning_rate": lr,
    "optimizer": optimizer,
}
active_player_obj = ActivePlayer(num_players)

path = "checkpoints/V2CFR_LEDUC_Full/policy/linear/4030245/V2CFR_LEDUC_Full_190.pt"
config = CFRConfig(
    config_dict={
        "network": {
            "policy": p_v_networks,
            "value": p_v_networks,
            "num_players": num_players,
        },
        "replay_buffer_size": replay_buffer_size,
        "minibatch_size": minibatch_size,
        "steps_per_epoch": steps_per_epoch,
        "traversals": traversals,
        "training_steps": training_steps,
        "active_player_obj": active_player_obj,
    },
    game_config={
        "num_players": num_players,
        "observation_space": input_dim,
        "action_space": 4,
    },
)

agent1, agent2 = load_agents(path, path, p_v_networks, num_players)
modelselect = CFRAgent(env=leducgame, config=config)

leducgame = EvalWrapper(
    game=leducgame,
    agent=agent1,
    in_size=input_dim,
    out_size=output_dim,
    select_model=modelselect,
)


config_dict = {
    "dense_layer_widths": [128, 128],
    "value_hidden_layer_widths": [128],
    "advatage_hidden_layer_widths": [128],
    "adam_epsilon": 1e-8,
    "learning_rate": 0.002,
    "training_steps": 1000000,
    "minibatch_size": 256,
    "save_intermediate_weights": True,
    "replay_buffer_size": 100000,
    "min_replay_buffer_size": 256,
    "transfer_interval": 1280,
    "loss_function": HuberLoss(),  # could do categorical cross entropy
    "clipnorm": 0.0,
    "discount_factor": 0.99,
    "atom_size": 1,
    "replay_interval": 64,
    "dueling": True,
    "eg_epsilon": 1,
    "eg_epsilon_final": 0.0,
    "eg_epsilon_final_step": 5000,
    "eg_epsilon_decay_type": "linear",
    "num_minibatches": 4,
}
gameconfig = EmptyConf()
config = RainbowConfig(config_dict, gameconfig)
device = "cuda:0" if torch.cuda.is_available() else "cpu"
agent = RainbowAgent(leducgame, config, name="Rainbow-Leduc-Test", device=device)
agent.checkpoint_interval = 100

for param in agent.model.parameters():
    print(param)

In [None]:
agent.train()