In [None]:
from agent_configs import RainbowConfig
import gymnasium as gym
import torch
import random
import numpy as np
import torch
from utils import CategoricalCrossentropyLoss, KLDivergenceLoss
from utils.utils import HuberLoss
from cfr_utils import (
    EvalWrapper,
    evaluatebots,
    WrapperEnv,
    load_agents,
    EmptyConf,
    NFSPWrapper,
    NFSPEvalWrapper,
    LoadNFSPAgent,
)
import pyspiel
import copy
from agent_configs.cfr_config import CFRConfig
from active_player import ActivePlayer
from cfr_agent import CFRAgent
from cfr_network import CFRNetwork
import sys

sys.path.append("..")
from dqn.rainbow.rainbow_agent import RainbowAgent
import tensorflow as tf
import os
import open_spiel.python.algorithms.nfsp

tf.config.set_visible_devices([], "GPU")
num_players = 2
max_nodes = 10000000

fhp = pyspiel.load_game(
    "universal_poker",
    {
        "numPlayers": 2,
        "numSuits": 4,
        "numRanks": 13,
        "numHoleCards": 2,
        "numBoardCards": "0 3",
        "bettingAbstraction": "fcpa",
        "numRounds": 2,
        "blind": "50 100",
    },
)
leduc = pyspiel.load_game(
    "universal_poker",
    {
        "numPlayers": 2,
        "numSuits": 2,
        "numRanks": 3,
        "numHoleCards": 1,
        "numBoardCards": "0 1",
        "bettingAbstraction": "fcpa",
        "numRounds": 2,
        "blind": "50 100",
    },
)
leducconfig = {"state_representation_size": 16}
fhpconfig = {"state_representation_size": 108}
leducgame = NFSPWrapper(leduc)
fhpgame = NFSPWrapper(fhp)

active_player_obj = ActivePlayer(2)

config_dict = {
    "dense_layer_widths": [128, 128],
    "value_hidden_layer_widths": [128],
    "advantage_hidden_layer_widths": [128],
    "adam_epsilon": 1e-8,
    "learning_rate": 0.001,
    "training_steps": 10000,
    "minibatch_size": 32,
    "replay_buffer_size": 100000,
    "min_replay_buffer_size": 32,
    "transfer_interval": 1280,
    "loss_function": KLDivergenceLoss(),
    "clipnorm": 0.0,
    "discount_factor": 0.99,
    "replay_interval": 64,
    "eg_epsilon": 1,
    "eg_epsilon_final": 0.0,
    "eg_epsilon_final_step": 2000,
    "eg_epsilon_decay_type": "linear",
    "num_minibatches": 4,
    "atom_size": 1,
    "noisy_sigma": 0.0,
    "per_beta": 0.0,
    "per_alpha": 0.0,
    "per_beta_final": 0.0,
    "n_step": 1,
}
gameconfig = EmptyConf()
config = RainbowConfig(config_dict, gameconfig)
config.v_min = -1200
config.v_max = 1200
device = "cuda:0" if torch.cuda.is_available() else "cpu"
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
tf.compat.v1.disable_v2_behavior()


mainpath1 = "./checkpoints/fhp/nfsp/0/1000002/"
mainpath2 = "./checkpoints/fhp/nfsp/0/4000001/"
mainpath3 = "./checkpoints/fhp/nfsp/0/7000001/"
mainpath4 = "./checkpoints/fhp/nfsp/0/8000000/"
mainpath5 = "./checkpoints/fhp/nfsp/0/10000000/"
mainpath6 = "./checkpoints/leduc/nfsp/0/1000000/"
mainpath7 = "./checkpoints/leduc/nfsp/0/4000001/"
mainpath8 = "./checkpoints/leduc/nfsp/0/7000001/"
mainpath9 = "./checkpoints/leduc/nfsp/0/8000000/"
mainpath10 = "./checkpoints/leduc/nfsp/0/10000000/"

leduc_agent_paths = [
    mainpath6,
    mainpath7,
    mainpath8,
    mainpath9,
    mainpath10,
]
fhp_agent_paths = [
    mainpath1,
    mainpath2,
    mainpath3,
    mainpath4,
    mainpath5,
]

nodes = 0
games = [leducgame, fhpgame]
for i in games:
    if i == leducgame:
        agent_paths = leduc_agent_paths
        game_string = "leduc"
    else:
        agent_paths = fhp_agent_paths
        game_string = "fhp"
    for number in range(len(agent_paths)):
        i.reset()

        with tf.compat.v1.Session() as sess:
            agent = open_spiel.python.algorithms.nfsp.NFSP(
                session=sess,
                player_id=0,
                state_representation_size=(
                    leducconfig["state_representation_size"]
                    if i == leducgame
                    else fhpconfig["state_representation_size"]
                ),
                num_actions=4,
                hidden_layers_sizes=[1024, 512, 1024, 512],
                reservoir_buffer_capacity=30000000,
                anticipatory_param=0,
                batch_size=256,
                rl_learning_rate=0.1,
                sl_learning_rate=0.01,
                min_buffer_size_to_learn=1000,
                learn_every=256,
                optimizer_str="sgd",
                replay_buffer_capacity=600000,
                epsilon_start=0.08,
                epsilon_end=0,
            )
            LoadNFSPAgent(agent_paths[number], agent, 0)
            agent.restore(agent_paths[number])  # IF YOU HAVE A NFSP AGENT PATH
            # agent.restore(path1) # IF YOU HAVE A NFSP AGENT PATH
            sess.run(tf.compat.v1.global_variables_initializer())

            wrapped = NFSPEvalWrapper(i, agent, 16, 4)
            model_name = "Deuling_" + game_string + "_agent_" + str(number)
            evaluator = RainbowAgent(wrapped, config, name=model_name, device=device)
            evaluator.checkpoint_interval = 200

            for param in evaluator.model.parameters():
                print(param)
            evaluator.train()

Using default save_intermediate_weights     : False
Using         training_steps                : 10000
Using         adam_epsilon                  : 1e-08
Using default momentum                      : 0.9
Using         learning_rate                 : 0.001
Using         clipnorm                      : 0.0
Using default optimizer                     : <class 'torch.optim.adam.Adam'>
Using default weight_decay                  : 0.0
Using         loss_function                 : <utils.utils.KLDivergenceLoss object at 0x348a20da0>
Using default activation                    : relu
Using         kernel_initializer            : None
Using         minibatch_size                : 32
Using         replay_buffer_size            : 100000
Using         min_replay_buffer_size        : 32
Using         num_minibatches               : 4
Using default training_iterations           : 1
Using default print_interval                : 100
RainbowConfig
Using default residual_layers               : []
Usi

INFO:tensorflow:Restoring parameters from ./checkpoints/leduc/nfsp/0/1000000/q_network_pid0


INFO:tensorflow:Restoring parameters from ./checkpoints/leduc/nfsp/0/1000000/avg_network_pid0


INFO:tensorflow:Restoring parameters from ./checkpoints/leduc/nfsp/0/1000000/avg_network_pid0
2025-04-24 18:30:24.246099: W tensorflow/c/c_api.cc:305] Operation '{name:'mlp_2/weights_4_5/Assign' id:5477 op device:{requested: '', assigned: ''} def:{{{node mlp_2/weights_4_5/Assign}} = AssignVariableOp[_has_manual_control_dependencies=true, dtype=DT_FLOAT, validate_shape=false](mlp_2/weights_4_5, zeros_99)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.


num_actions:  4
float32
Parameter containing:
tensor([[-0.0387,  0.1424,  0.1356,  ..., -0.1193,  0.2109, -0.2149],
        [-0.0606,  0.0378,  0.0511,  ..., -0.2193, -0.0842, -0.1617],
        [-0.2293,  0.1946, -0.1496,  ...,  0.1164,  0.2161, -0.0674],
        ...,
        [-0.1260, -0.0601,  0.0103,  ...,  0.1794,  0.1275,  0.1623],
        [ 0.0858, -0.1984,  0.0916,  ...,  0.2449, -0.1395,  0.2120],
        [ 0.1745, -0.2096, -0.0616,  ..., -0.1356,  0.2044,  0.1124]],
       requires_grad=True)
Parameter containing:
tensor([[0.1250, 0.1250, 0.1250,  ..., 0.1250, 0.1250, 0.1250],
        [0.1250, 0.1250, 0.1250,  ..., 0.1250, 0.1250, 0.1250],
        [0.1250, 0.1250, 0.1250,  ..., 0.1250, 0.1250, 0.1250],
        ...,
        [0.1250, 0.1250, 0.1250,  ..., 0.1250, 0.1250, 0.1250],
        [0.1250, 0.1250, 0.1250,  ..., 0.1250, 0.1250, 0.1250],
        [0.1250, 0.1250, 0.1250,  ..., 0.1250, 0.1250, 0.1250]],
       requires_grad=True)
Parameter containing:
tensor([-0.1880, -0.0411

INFO:tensorflow:Restoring parameters from ./checkpoints/leduc/nfsp/0/4000001/q_network_pid0


INFO:tensorflow:Restoring parameters from ./checkpoints/leduc/nfsp/0/4000001/avg_network_pid0


INFO:tensorflow:Restoring parameters from ./checkpoints/leduc/nfsp/0/4000001/avg_network_pid0
2025-04-24 18:51:30.346162: W tensorflow/c/c_api.cc:305] Operation '{name:'mlp_2/weights_4_6/Assign' id:6427 op device:{requested: '', assigned: ''} def:{{{node mlp_2/weights_4_6/Assign}} = AssignVariableOp[_has_manual_control_dependencies=true, dtype=DT_FLOAT, validate_shape=false](mlp_2/weights_4_6, zeros_119)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.


num_actions:  4
float32
Parameter containing:
tensor([[-0.2132, -0.1325,  0.1449,  ..., -0.0294,  0.1902, -0.2222],
        [ 0.0034, -0.0749, -0.1597,  ..., -0.2124, -0.0075,  0.0055],
        [ 0.0130, -0.0592,  0.0318,  ...,  0.0541,  0.1074,  0.1347],
        ...,
        [-0.1521,  0.1424, -0.1522,  ..., -0.1665,  0.2450, -0.2215],
        [ 0.1710,  0.0053, -0.1334,  ...,  0.0572, -0.1827, -0.0751],
        [-0.1107,  0.0891,  0.0266,  ...,  0.1177, -0.1204, -0.2183]],
       requires_grad=True)
Parameter containing:
tensor([[0.1250, 0.1250, 0.1250,  ..., 0.1250, 0.1250, 0.1250],
        [0.1250, 0.1250, 0.1250,  ..., 0.1250, 0.1250, 0.1250],
        [0.1250, 0.1250, 0.1250,  ..., 0.1250, 0.1250, 0.1250],
        ...,
        [0.1250, 0.1250, 0.1250,  ..., 0.1250, 0.1250, 0.1250],
        [0.1250, 0.1250, 0.1250,  ..., 0.1250, 0.1250, 0.1250],
        [0.1250, 0.1250, 0.1250,  ..., 0.1250, 0.1250, 0.1250]],
       requires_grad=True)
Parameter containing:
tensor([ 0.0252, -0.2462

INFO:tensorflow:Restoring parameters from ./checkpoints/leduc/nfsp/0/7000001/q_network_pid0


INFO:tensorflow:Restoring parameters from ./checkpoints/leduc/nfsp/0/7000001/avg_network_pid0


INFO:tensorflow:Restoring parameters from ./checkpoints/leduc/nfsp/0/7000001/avg_network_pid0
2025-04-24 19:16:27.728938: W tensorflow/c/c_api.cc:305] Operation '{name:'mlp_8/weights_4_1/Assign' id:7377 op device:{requested: '', assigned: ''} def:{{{node mlp_8/weights_4_1/Assign}} = AssignVariableOp[_has_manual_control_dependencies=true, dtype=DT_FLOAT, validate_shape=false](mlp_8/weights_4_1, zeros_139)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.


num_actions:  4
float32
Parameter containing:
tensor([[-0.0093,  0.1627,  0.0237,  ...,  0.0605, -0.0526, -0.1078],
        [-0.1705,  0.2001,  0.1729,  ..., -0.0298,  0.1511, -0.1159],
        [ 0.1146, -0.1420, -0.0291,  ...,  0.2271, -0.1694, -0.2072],
        ...,
        [-0.0321,  0.1457, -0.1098,  ..., -0.2221, -0.0109,  0.1575],
        [-0.0883, -0.2151,  0.1374,  ...,  0.1975, -0.1479,  0.2131],
        [ 0.1497,  0.2183, -0.0608,  ...,  0.1889, -0.0342, -0.0183]],
       requires_grad=True)
Parameter containing:
tensor([[0.1250, 0.1250, 0.1250,  ..., 0.1250, 0.1250, 0.1250],
        [0.1250, 0.1250, 0.1250,  ..., 0.1250, 0.1250, 0.1250],
        [0.1250, 0.1250, 0.1250,  ..., 0.1250, 0.1250, 0.1250],
        ...,
        [0.1250, 0.1250, 0.1250,  ..., 0.1250, 0.1250, 0.1250],
        [0.1250, 0.1250, 0.1250,  ..., 0.1250, 0.1250, 0.1250],
        [0.1250, 0.1250, 0.1250,  ..., 0.1250, 0.1250, 0.1250]],
       requires_grad=True)
Parameter containing:
tensor([-0.0169,  0.0817

INFO:tensorflow:Restoring parameters from ./checkpoints/leduc/nfsp/0/8000000/q_network_pid0


INFO:tensorflow:Restoring parameters from ./checkpoints/leduc/nfsp/0/8000000/avg_network_pid0


INFO:tensorflow:Restoring parameters from ./checkpoints/leduc/nfsp/0/8000000/avg_network_pid0
2025-04-24 19:39:57.282081: W tensorflow/c/c_api.cc:305] Operation '{name:'mlp_8/weights_4_2/Assign' id:8327 op device:{requested: '', assigned: ''} def:{{{node mlp_8/weights_4_2/Assign}} = AssignVariableOp[_has_manual_control_dependencies=true, dtype=DT_FLOAT, validate_shape=false](mlp_8/weights_4_2, zeros_159)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.


num_actions:  4
float32
Parameter containing:
tensor([[ 0.2378, -0.1560, -0.1265,  ..., -0.1625,  0.1329,  0.0983],
        [ 0.0424,  0.0147,  0.0789,  ..., -0.0644,  0.0989,  0.1075],
        [-0.0654, -0.0124, -0.1924,  ...,  0.0647, -0.1127, -0.1819],
        ...,
        [-0.0032, -0.1908, -0.2493,  ..., -0.0293, -0.0832,  0.1655],
        [-0.0493, -0.0238,  0.0421,  ...,  0.1483, -0.0109, -0.2024],
        [-0.1611,  0.1256, -0.0718,  ...,  0.2049,  0.0277, -0.2087]],
       requires_grad=True)
Parameter containing:
tensor([[0.1250, 0.1250, 0.1250,  ..., 0.1250, 0.1250, 0.1250],
        [0.1250, 0.1250, 0.1250,  ..., 0.1250, 0.1250, 0.1250],
        [0.1250, 0.1250, 0.1250,  ..., 0.1250, 0.1250, 0.1250],
        ...,
        [0.1250, 0.1250, 0.1250,  ..., 0.1250, 0.1250, 0.1250],
        [0.1250, 0.1250, 0.1250,  ..., 0.1250, 0.1250, 0.1250],
        [0.1250, 0.1250, 0.1250,  ..., 0.1250, 0.1250, 0.1250]],
       requires_grad=True)
Parameter containing:
tensor([ 0.1208, -0.0940

INFO:tensorflow:Restoring parameters from ./checkpoints/leduc/nfsp/0/10000000/q_network_pid0


INFO:tensorflow:Restoring parameters from ./checkpoints/leduc/nfsp/0/10000000/avg_network_pid0


INFO:tensorflow:Restoring parameters from ./checkpoints/leduc/nfsp/0/10000000/avg_network_pid0
2025-04-24 19:58:15.265930: W tensorflow/c/c_api.cc:305] Operation '{name:'mlp_8/weights_4_3/Assign' id:9277 op device:{requested: '', assigned: ''} def:{{{node mlp_8/weights_4_3/Assign}} = AssignVariableOp[_has_manual_control_dependencies=true, dtype=DT_FLOAT, validate_shape=false](mlp_8/weights_4_3, zeros_179)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.


num_actions:  4
float32
Parameter containing:
tensor([[-0.0061,  0.1451,  0.1814,  ...,  0.2017, -0.1751,  0.0222],
        [ 0.1377, -0.1481,  0.1134,  ...,  0.0544, -0.0025, -0.0751],
        [ 0.0740, -0.0122, -0.0627,  ..., -0.1409, -0.1177, -0.1074],
        ...,
        [-0.2233, -0.2277, -0.0231,  ..., -0.0606, -0.1158,  0.2097],
        [ 0.1082,  0.1162, -0.1571,  ..., -0.0555, -0.0650,  0.2107],
        [ 0.1305, -0.2376, -0.0251,  ..., -0.0503, -0.1969,  0.2250]],
       requires_grad=True)
Parameter containing:
tensor([[0.1250, 0.1250, 0.1250,  ..., 0.1250, 0.1250, 0.1250],
        [0.1250, 0.1250, 0.1250,  ..., 0.1250, 0.1250, 0.1250],
        [0.1250, 0.1250, 0.1250,  ..., 0.1250, 0.1250, 0.1250],
        ...,
        [0.1250, 0.1250, 0.1250,  ..., 0.1250, 0.1250, 0.1250],
        [0.1250, 0.1250, 0.1250,  ..., 0.1250, 0.1250, 0.1250],
        [0.1250, 0.1250, 0.1250,  ..., 0.1250, 0.1250, 0.1250]],
       requires_grad=True)
Parameter containing:
tensor([-0.1546, -0.0836

KeyboardInterrupt: 