In [None]:
from agent_configs import RainbowConfig
import gymnasium as gym
import torch
import random
import numpy as np
import torch
from utils import CategoricalCrossentropyLoss, KLDivergenceLoss
from utils.utils import HuberLoss
from cfr_utils import (
    EvalWrapper,
    evaluatebots,
    WrapperEnv,
    load_agents,
    EmptyConf,
    NFSPWrapper,
    NFSPEvalWrapper,
    LoadNFSPAgent,
)
import pyspiel
import copy
from agent_configs.cfr_config import CFRConfig
from active_player import ActivePlayer
from cfr_agent import CFRAgent
from cfr_network import CFRNetwork
import sys

sys.path.append("..")
from dqn.rainbow.rainbow_agent import RainbowAgent
import tensorflow as tf
import os
import open_spiel.python.algorithms.nfsp

tf.config.set_visible_devices([], "GPU")
num_players = 2
max_nodes = 10000000

fhp = pyspiel.load_game(
    "universal_poker",
    {
        "numPlayers": 2,
        "numSuits": 4,
        "numRanks": 13,
        "numHoleCards": 2,
        "numBoardCards": "0 3",
        "bettingAbstraction": "fcpa",
        "numRounds": 2,
        "blind": "50 100",
    },
)
leduc = pyspiel.load_game(
    "universal_poker",
    {
        "numPlayers": 2,
        "numSuits": 2,
        "numRanks": 3,
        "numHoleCards": 1,
        "numBoardCards": "0 1",
        "bettingAbstraction": "fcpa",
        "numRounds": 2,
        "blind": "50 100",
    },
)
leducconfig = {"state_representation_size": 16}
fhpconfig = {"state_representation_size": 108}
leducgame = NFSPWrapper(leduc)
fhpgame = NFSPWrapper(fhp)

active_player_obj = ActivePlayer(2)

config_dict = {
    "dense_layer_widths": [128, 256, 256, 128],
    "value_hidden_layer_widths": [128, 128],
    "advantage_hidden_layer_widths": [128, 128],
    "adam_epsilon": 1e-8,
    "learning_rate": 0.002,
    "training_steps": 20000,
    "per_epsilon": 0.001,
    "per_alpha": 0,
    "per_beta": 0,
    "per_beta_final": 0.5,
    "minibatch_size": 256,
    "replay_buffer_size": 1000000,
    "min_replay_buffer_size": 256,
    "transfer_interval": 1024,
    "loss_function": KLDivergenceLoss(),
    "clipnorm": 0.0,
    "discount_factor": 0.99,
    "replay_interval": 64,
    "eg_epsilon": 1,
    "eg_epsilon_final": 0.0,
    "eg_epsilon_final_step": 5000,
    "eg_epsilon_decay_type": "linear",
    "num_minibatches": 4,
}
gameconfig = EmptyConf()
config = RainbowConfig(config_dict, gameconfig)
config.v_min = -1200
config.v_max = 1200
device = "cuda:0" if torch.cuda.is_available() else "cpu"
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
tf.compat.v1.disable_v2_behavior()


mainpath1 = "./checkpoints/fhp/nfsp/0/1000002/"
mainpath2 = "./checkpoints/fhp/nfsp/0/4000001/"
mainpath3 = "./checkpoints/fhp/nfsp/0/7000001/"
mainpath4 = "./checkpoints/fhp/nfsp/0/8000000/"
mainpath5 = "./checkpoints/fhp/nfsp/0/10000000/"
mainpath6 = "./checkpoints/leduc/nfsp/0/1000000/"
mainpath7 = "./checkpoints/leduc/nfsp/0/4000001/"
mainpath8 = "./checkpoints/leduc/nfsp/0/7000001/"
mainpath9 = "./checkpoints/leduc/nfsp/0/8000000/"
mainpath10 = "./checkpoints/leduc/nfsp/0/10000000/"

leduc_agent_paths = [
    mainpath6,
    mainpath7,
    mainpath8,
    mainpath9,
    mainpath10,
]
fhp_agent_paths = [
    mainpath1,
    mainpath2,
    mainpath3,
    mainpath4,
    mainpath5,
]

nodes = 0
games = [fhpgame, leducgame]
for i in games:
    if i == leducgame:
        agent_paths = leduc_agent_paths
        game_string = "leduc"
    else:
        agent_paths = fhp_agent_paths
        game_string = "fhp"
    for number in range(len(agent_paths)):
        i.reset()

        with tf.compat.v1.Session() as sess:
            agent = open_spiel.python.algorithms.nfsp.NFSP(
                session=sess,
                player_id=0,
                state_representation_size=(
                    leducconfig["state_representation_size"]
                    if i == leducgame
                    else fhpconfig["state_representation_size"]
                ),
                num_actions=4,
                hidden_layers_sizes=[1024, 512, 1024, 512],
                reservoir_buffer_capacity=30000000,
                anticipatory_param=0,
                batch_size=256,
                rl_learning_rate=0.1,
                sl_learning_rate=0.01,
                min_buffer_size_to_learn=1000,
                learn_every=256,
                optimizer_str="sgd",
                replay_buffer_capacity=600000,
                epsilon_start=0.08,
                epsilon_end=0,
            )
            LoadNFSPAgent(agent_paths[number], agent, 0)
            agent.restore(agent_paths[number])  # IF YOU HAVE A NFSP AGENT PATH
            # agent.restore(path1) # IF YOU HAVE A NFSP AGENT PATH
            sess.run(tf.compat.v1.global_variables_initializer())

            wrapped = NFSPEvalWrapper(
                i,
                agent,
                (
                    leducconfig["state_representation_size"]
                    if i == leducgame
                    else fhpconfig["state_representation_size"]
                ),
                4,
            )
            model_name = "Rainbow2_" + game_string + "_agent_" + str(number)
            evaluator = RainbowAgent(wrapped, config, name=model_name, device=device)
            evaluator.checkpoint_interval = 200

            for param in evaluator.model.parameters():
                print(param)
            evaluator.train()

In [17]:
from agent_configs import RainbowConfig
import gymnasium as gym
import torch
import random
import numpy as np
import torch
from utils import CategoricalCrossentropyLoss, KLDivergenceLoss
from utils.utils import HuberLoss
from cfr_utils import (
    EvalWrapper,
    evaluatebots,
    WrapperEnv,
    load_agents,
    EmptyConf,
    NFSPWrapper,
    NFSPEvalWrapper,
    LoadNFSPAgent,
)
import pyspiel
import copy
from agent_configs.cfr_config import CFRConfig
from active_player import ActivePlayer
from cfr_agent import CFRAgent
from cfr_network import CFRNetwork
import sys

sys.path.append("..")
from dqn.rainbow.rainbow_agent import RainbowAgent
import tensorflow as tf
import os
import open_spiel.python.algorithms.nfsp

tf.config.set_visible_devices([], "GPU")
num_players = 2
max_nodes = 10000000

fhp = pyspiel.load_game(
    "universal_poker",
    {
        "numPlayers": 2,
        "numSuits": 4,
        "numRanks": 13,
        "numHoleCards": 2,
        "numBoardCards": "0 3",
        "bettingAbstraction": "fcpa",
        "numRounds": 2,
        "blind": "50 100",
    },
)
leduc = pyspiel.load_game(
    "universal_poker",
    {
        "numPlayers": 2,
        "numSuits": 2,
        "numRanks": 3,
        "numHoleCards": 1,
        "numBoardCards": "0 1",
        "bettingAbstraction": "fcpa",
        "numRounds": 2,
        "blind": "50 100",
    },
)
leducconfig = {"state_representation_size": 16}
fhpconfig = {"state_representation_size": 108}
leducgame = NFSPWrapper(leduc)
fhpgame = NFSPWrapper(fhp)

active_player_obj = ActivePlayer(2)

config_dict = {
    "dense_layer_widths": [128, 256, 256, 128],
    "value_hidden_layer_widths": [128, 128],
    "advantage_hidden_layer_widths": [128, 128],
    "adam_epsilon": 1e-8,
    "learning_rate": 0.002,
    "training_steps": 20000,
    "per_epsilon": 0.001,
    "per_alpha": 0,
    "per_beta": 0,
    "per_beta_final": 0.5,
    "minibatch_size": 256,
    "replay_buffer_size": 1000000,
    "min_replay_buffer_size": 256,
    "transfer_interval": 1024,
    "loss_function": KLDivergenceLoss(),
    "clipnorm": 0.0,
    "discount_factor": 0.99,
    "replay_interval": 64,
    "eg_epsilon": 1,
    "eg_epsilon_final": 0.0,
    "eg_epsilon_final_step": 5000,
    "eg_epsilon_decay_type": "linear",
    "num_minibatches": 4,
}
gameconfig = EmptyConf()
config = RainbowConfig(config_dict, gameconfig)
config.v_min = -1200
config.v_max = 1200
device = "cuda:0" if torch.cuda.is_available() else "cpu"
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
tf.compat.v1.disable_v2_behavior()


mainpath1 = "./checkpoints/fhp/nfsp/0/1000002/"
mainpath2 = "./checkpoints/fhp/nfsp/0/4000001/"
mainpath3 = "./checkpoints/fhp/nfsp/0/7000001/"
mainpath4 = "./checkpoints/fhp/nfsp/0/8000000/"
mainpath5 = "./checkpoints/fhp/nfsp/0/10000000/"
mainpath6 = "./checkpoints/leduc/nfsp/0/1000000/"
mainpath7 = "./checkpoints/leduc/nfsp/0/4000001/"
mainpath8 = "./checkpoints/leduc/nfsp/0/7000001/"
mainpath9 = "./checkpoints/leduc/nfsp/0/8000000/"
mainpath10 = "./checkpoints/leduc/nfsp/0/10000000/"

leduc_agent_paths = [
    mainpath6,
    mainpath7,
    mainpath8,
    mainpath9,
    mainpath10,
]
fhp_agent_paths = [
    mainpath1,
    mainpath2,
    mainpath3,
    mainpath4,
    mainpath5,
]

with tf.compat.v1.Session() as sess:
    agent = open_spiel.python.algorithms.nfsp.NFSP(
        session=sess,
        player_id=0,
        state_representation_size=(108),
        num_actions=4,
        hidden_layers_sizes=[1024, 512, 1024, 512],
        reservoir_buffer_capacity=30000000,
        anticipatory_param=0,
        batch_size=256,
        rl_learning_rate=0.1,
        sl_learning_rate=0.01,
        min_buffer_size_to_learn=1000,
        learn_every=256,
        optimizer_str="sgd",
        replay_buffer_capacity=600000,
        epsilon_start=0.08,
        epsilon_end=0,
    )
    LoadNFSPAgent(mainpath5, agent, 0)
    agent.restore(mainpath5)  # IF YOU HAVE A NFSP AGENT PATH
    # agent.restore(path1) # IF YOU HAVE A NFSP AGENT PATH
    sess.run(tf.compat.v1.global_variables_initializer())

    wrapped = NFSPEvalWrapper(
        fhpgame,
        agent,
        (108),
        4,
    )
    model_name = "Rainbow2_" + "fhp" + "_agent_" + "4"
    evaluator = RainbowAgent(wrapped, config, name=model_name, device=device)
    evaluator.checkpoint_interval = 200

    for param in evaluator.model.parameters():
        print(param)
    evaluator.train()

Training step: 12301/20000
Training step: 12401/20000
Checkpointing at 12400 with score 328.0 and loss 1.045425683259964
Training step: 12501/20000
Training step: 12601/20000
Checkpointing at 12600 with score 314.0 and loss 1.0729773759841919
Training step: 12701/20000
Training step: 12801/20000
Checkpointing at 12800 with score 321.0 and loss 1.0230125546455384
Training step: 12901/20000
Training step: 13001/20000
Checkpointing at 13000 with score 309.5 and loss 1.0454333066940307
Training step: 13101/20000
Training step: 13201/20000
Checkpointing at 13200 with score 352.5 and loss 1.0744955360889434
Training step: 13301/20000
Training step: 13401/20000
Checkpointing at 13400 with score 471.5 and loss 1.0481966853141784
Training step: 13501/20000
Training step: 13601/20000
Checkpointing at 13600 with score 347.0 and loss 1.0657909393310547
Training step: 13701/20000
Training step: 13801/20000
Checkpointing at 13800 with score 224.0 and loss 1.0541116714477539
Training step: 13901/2000

In [18]:
from agent_configs import RainbowConfig
import gymnasium as gym
import torch
import random
import numpy as np
import torch
from utils import CategoricalCrossentropyLoss, KLDivergenceLoss
from utils.utils import HuberLoss
from cfr_utils import (
    EvalWrapper,
    evaluatebots,
    WrapperEnv,
    load_agents,
    EmptyConf,
    NFSPWrapper,
    NFSPEvalWrapper,
    LoadNFSPAgent,
)
import pyspiel
import copy
from agent_configs.cfr_config import CFRConfig
from active_player import ActivePlayer
from cfr_agent import CFRAgent
from cfr_network import CFRNetwork
import sys

sys.path.append("..")
from dqn.rainbow.rainbow_agent import RainbowAgent
import tensorflow as tf
import os
import open_spiel.python.algorithms.nfsp

tf.config.set_visible_devices([], "GPU")
num_players = 2
max_nodes = 10000000

leduc = pyspiel.load_game(
    "universal_poker",
    {
        "numPlayers": 2,
        "numSuits": 2,
        "numRanks": 3,
        "numHoleCards": 1,
        "numBoardCards": "0 1",
        "bettingAbstraction": "fcpa",
        "numRounds": 2,
        "blind": "50 100",
    },
)
leducconfig = {"state_representation_size": 16}
leducgame = NFSPWrapper(leduc)

active_player_obj = ActivePlayer(2)

config_dict = {
    "dense_layer_widths": [128, 128],
    "value_hidden_layer_widths": [128],
    "advantage_hidden_layer_widths": [128],
    "adam_epsilon": 1e-8,
    "learning_rate": 0.001,
    "training_steps": 10000,
    # "per_epsilon": 0.001,
    "per_alpha": 0,
    "per_beta": 0,
    "per_beta_final": 0.5,
    "minibatch_size": 256,
    "replay_buffer_size": 1000000,
    "min_replay_buffer_size": 256,
    "transfer_interval": 1024,
    "loss_function": KLDivergenceLoss(),
    "clipnorm": 0.0,
    "discount_factor": 0.99,
    "replay_interval": 64,
    "eg_epsilon": 1,
    "eg_epsilon_final": 0.0,
    "eg_epsilon_final_step": 5000,
    "eg_epsilon_decay_type": "linear",
    "num_minibatches": 4,
}
gameconfig = EmptyConf()
config = RainbowConfig(config_dict, gameconfig)
config.v_min = -1200
config.v_max = 1200
device = "cuda:0" if torch.cuda.is_available() else "cpu"
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
tf.compat.v1.disable_v2_behavior()


mainpath1 = "./checkpoints/fhp/nfsp/0/1000002/"
mainpath2 = "./checkpoints/fhp/nfsp/0/4000001/"
mainpath3 = "./checkpoints/fhp/nfsp/0/7000001/"
mainpath4 = "./checkpoints/fhp/nfsp/0/8000000/"
mainpath5 = "./checkpoints/fhp/nfsp/0/10000000/"
mainpath6 = "./checkpoints/leduc/nfsp/0/1000000/"
mainpath7 = "./checkpoints/leduc/nfsp/0/4000001/"
mainpath8 = "./checkpoints/leduc/nfsp/0/7000001/"
mainpath9 = "./checkpoints/leduc/nfsp/0/8000000/"
mainpath10 = "./checkpoints/leduc/nfsp/0/10000000/"

leduc_agent_paths = [
    mainpath6,
    mainpath7,
    mainpath8,
    mainpath9,
    mainpath10,
]
fhp_agent_paths = [
    mainpath1,
    mainpath2,
    mainpath3,
    mainpath4,
    mainpath5,
]

with tf.compat.v1.Session() as sess:
    agent = open_spiel.python.algorithms.nfsp.NFSP(
        session=sess,
        player_id=0,
        state_representation_size=(16),
        num_actions=4,
        hidden_layers_sizes=[1024, 512, 1024, 512],
        reservoir_buffer_capacity=30000000,
        anticipatory_param=0,
        batch_size=256,
        rl_learning_rate=0.1,
        sl_learning_rate=0.01,
        min_buffer_size_to_learn=1000,
        learn_every=256,
        optimizer_str="sgd",
        replay_buffer_capacity=600000,
        epsilon_start=0.08,
        epsilon_end=0,
    )
    LoadNFSPAgent(mainpath10, agent, 0)
    agent.restore(mainpath10)  # IF YOU HAVE A NFSP AGENT PATH
    # agent.restore(path1) # IF YOU HAVE A NFSP AGENT PATH
    sess.run(tf.compat.v1.global_variables_initializer())

    wrapped = NFSPEvalWrapper(
        leducgame,
        agent,
        (16),
        4,
    )
    model_name = "Rainbow_" + "leduc" + "_agent_" + "4"
    evaluator = RainbowAgent(wrapped, config, name=model_name, device=device)
    evaluator.checkpoint_interval = 200

    for param in evaluator.model.parameters():
        print(param)
    evaluator.train()

Using default save_intermediate_weights     : False
Using         training_steps                : 10000
Using         adam_epsilon                  : 1e-08
Using default momentum                      : 0.9
Using         learning_rate                 : 0.001
Using         clipnorm                      : 0.0
Using default optimizer                     : <class 'torch.optim.adam.Adam'>
Using default weight_decay                  : 0.0
Using         loss_function                 : <utils.utils.KLDivergenceLoss object at 0x3387b3bc0>
Using default activation                    : relu
Using         kernel_initializer            : None
Using         minibatch_size                : 256
Using         replay_buffer_size            : 1000000
Using         min_replay_buffer_size        : 256
Using         num_minibatches               : 4
Using default training_iterations           : 1
Using default print_interval                : 100
RainbowConfig
Using default residual_layers               : []


INFO:tensorflow:Restoring parameters from ./checkpoints/leduc/nfsp/0/10000000/q_network_pid0


INFO:tensorflow:Restoring parameters from ./checkpoints/leduc/nfsp/0/10000000/avg_network_pid0


INFO:tensorflow:Restoring parameters from ./checkpoints/leduc/nfsp/0/10000000/avg_network_pid0
2025-04-25 20:13:17.738076: W tensorflow/c/c_api.cc:305] Operation '{name:'mlp_8/weights_4_1/Assign' id:21727 op device:{requested: '', assigned: ''} def:{{{node mlp_8/weights_4_1/Assign}} = AssignVariableOp[_has_manual_control_dependencies=true, dtype=DT_FLOAT, validate_shape=false](mlp_8/weights_4_1, zeros_399)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.


num_actions:  4
float32
Parameter containing:
tensor([[-0.0400, -0.2339,  0.0483,  ..., -0.1404, -0.0579,  0.0247],
        [ 0.0705, -0.2216,  0.1376,  ..., -0.0146, -0.0453,  0.1677],
        [-0.0422,  0.0694,  0.1510,  ...,  0.0974,  0.0799, -0.1002],
        ...,
        [ 0.2130,  0.1560, -0.1812,  ..., -0.1575,  0.2424,  0.0664],
        [-0.0415,  0.2345, -0.1260,  ..., -0.0613,  0.2225, -0.0972],
        [ 0.2426,  0.2044, -0.1768,  ..., -0.1093, -0.0448,  0.0304]],
       requires_grad=True)
Parameter containing:
tensor([[0.1250, 0.1250, 0.1250,  ..., 0.1250, 0.1250, 0.1250],
        [0.1250, 0.1250, 0.1250,  ..., 0.1250, 0.1250, 0.1250],
        [0.1250, 0.1250, 0.1250,  ..., 0.1250, 0.1250, 0.1250],
        ...,
        [0.1250, 0.1250, 0.1250,  ..., 0.1250, 0.1250, 0.1250],
        [0.1250, 0.1250, 0.1250,  ..., 0.1250, 0.1250, 0.1250],
        [0.1250, 0.1250, 0.1250,  ..., 0.1250, 0.1250, 0.1250]],
       requires_grad=True)
Parameter containing:
tensor([-0.2352,  0.2355