In [1]:
from agent_configs import RainbowConfig
import gymnasium as gym
import torch
import random
import numpy as np
import torch
from utils import CategoricalCrossentropyLoss, KLDivergenceLoss
from utils.utils import HuberLoss
from cfr_utils import (
    EvalWrapper,
    evaluatebots,
    WrapperEnv,
    load_agents,
    EmptyConf,
    NFSPWrapper,
    NFSPEvalWrapper,
    LoadNFSPAgent,
)
import pyspiel
import copy
from agent_configs.cfr_config import CFRConfig
from active_player import ActivePlayer
from cfr_agent import CFRAgent
from cfr_network import CFRNetwork
import sys

sys.path.append("..")
from dqn.rainbow.rainbow_agent import RainbowAgent
import tensorflow as tf
import os
import open_spiel.python.algorithms.nfsp

tf.config.set_visible_devices([], "GPU")
num_players = 2
max_nodes = 10000000

fhp = pyspiel.load_game(
    "universal_poker",
    {
        "numPlayers": 2,
        "numSuits": 4,
        "numRanks": 13,
        "numHoleCards": 2,
        "numBoardCards": "0 3",
        "bettingAbstraction": "fcpa",
        "numRounds": 2,
        "blind": "50 100",
    },
)
leduc = pyspiel.load_game(
    "universal_poker",
    {
        "numPlayers": 2,
        "numSuits": 2,
        "numRanks": 3,
        "numHoleCards": 1,
        "numBoardCards": "0 1",
        "bettingAbstraction": "fcpa",
        "numRounds": 2,
        "blind": "50 100",
    },
)
leducconfig = {"state_representation_size": 16}
fhpconfig = {"state_representation_size": 108}
leducgame = NFSPWrapper(leduc)
fhpgame = NFSPWrapper(fhp)

active_player_obj = ActivePlayer(2)

config_dict = {
    "dense_layer_widths": [128, 128],
    "value_hidden_layer_widths": [128],
    "advatage_hidden_layer_widths": [128],
    "adam_epsilon": 1e-8,
    "learning_rate": 0.002,
    "training_steps": 22000,
    "minibatch_size": 32,
    "replay_buffer_size": 100000,
    "min_replay_buffer_size": 32,
    "transfer_interval": 1280,
    "loss_function": KLDivergenceLoss(),
    "clipnorm": 0.0,
    "discount_factor": 0.99,
    "replay_interval": 64,
    "eg_epsilon": 1,
    "eg_epsilon_final": 0.0,
    "eg_epsilon_final_step": 2000,
    "eg_epsilon_decay_type": "linear",
    "num_minibatches": 4,
}
gameconfig = EmptyConf()
config = RainbowConfig(config_dict, gameconfig)
device = "cuda:0" if torch.cuda.is_available() else "cpu"
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
tf.compat.v1.disable_v2_behavior()


mainpath1 = "checkpoints/leduc/nfsp/0/10000000/"
mainpath2 = "checkpoints/leduc/nfsp/0/10000000/"
mainpath3 = "checkpoints/leduc/nfsp/0/10000000/"
mainpath4 = "checkpoints/leduc/nfsp/0/10000000/"
mainpath5 = "checkpoints/leduc/nfsp/0/10000000/"
mainpath6 = "checkpoints/leduc/nfsp/0/10000000/"
mainpath7 = "checkpoints/leduc/nfsp/0/10000000/"
mainpath8 = "checkpoints/leduc/nfsp/0/10000000/"
mainpath9 = "checkpoints/leduc/nfsp/0/10000000/"
mainpath10 = "checkpoints/leduc/nfsp/0/10000000/"
mainpath11 = "checkpoints/leduc/nfsp/0/10000000/"
mainpath12 = "checkpoints/leduc/nfsp/0/10000000/"
mainpath13 = "checkpoints/leduc/nfsp/0/10000000/"
mainpath14 = "checkpoints/leduc/nfsp/0/10000000/"
mainpath15 = "checkpoints/leduc/nfsp/0/10000000/"
mainpath16 = "checkpoints/leduc/nfsp/0/10000000/"
mainpath17 = "checkpoints/leduc/nfsp/0/10000000/"
mainpath18 = "checkpoints/leduc/nfsp/0/10000000/"
mainpath19 = "checkpoints/leduc/nfsp/0/10000000/"
mainpath20 = "checkpoints/leduc/nfsp/0/10000000/"

leduc_agent_paths = [mainpath1, mainpath2, mainpath3, mainpath4, mainpath5, mainpath6, mainpath7, mainpath8, mainpath9, mainpath10]
fhp_agent_paths = [mainpath11, mainpath12, mainpath13, mainpath14, mainpath15, mainpath16, mainpath17, mainpath18, mainpath19, mainpath20]

nodes = 0
games = [leducgame, fhpgame]
for i in games:
    if i == leducgame:
        agent_paths = leduc_agent_paths
        game_string = "leduc"
    else:
        agent_paths = fhp_agent_paths
        game_string = "fhp"
    for number in range(len(agent_paths)):
        i.reset()

        with tf.compat.v1.Session() as sess:
            agent = open_spiel.python.algorithms.nfsp.NFSP(
                session=sess,
                player_id=0,
                state_representation_size=(
                    leducconfig["state_representation_size"]
                    if i == leducgame
                    else fhpconfig["state_representation_size"]
                ),
                num_actions=4,
                hidden_layers_sizes=[1024, 512, 1024, 512],
                reservoir_buffer_capacity=30000000,
                anticipatory_param=0,
                batch_size=256,
                rl_learning_rate=0.1,
                sl_learning_rate=0.01,
                min_buffer_size_to_learn=1000,
                learn_every=256,
                optimizer_str="sgd",
                replay_buffer_capacity=600000,
                epsilon_start=0.08,
                epsilon_end=0,
            )
            LoadNFSPAgent(agent_paths[number], agent, 0)
            agent.restore(agent_paths[number])  # IF YOU HAVE A NFSP AGENT PATH
            # agent.restore(path1) # IF YOU HAVE A NFSP AGENT PATH
            sess.run(tf.compat.v1.global_variables_initializer())

            wrapped = NFSPEvalWrapper(i, agent, 16, 4)
            model_name = "Rainbow_" + game_string + "_agent_" + str(number)
            evaluator = RainbowAgent(
                wrapped, config, name="Rainbow", device=device
            )
            evaluator.checkpoint_interval = 200

            for param in evaluator.model.parameters():
                print(param)
            evaluator.train()



Instructions for updating:
non-resource variables are not supported in the long term
Using default save_intermediate_weights     : False
Using         training_steps                : 22000
Using         adam_epsilon                  : 1e-08
Using default momentum                      : 0.9
Using         learning_rate                 : 0.002
Using         clipnorm                      : 0.0
Using default optimizer                     : <class 'torch.optim.adam.Adam'>
Using default weight_decay                  : 0.0
Using         loss_function                 : <utils.utils.KLDivergenceLoss object at 0x1197c1120>
Using default activation                    : relu
Using         kernel_initializer            : None
Using         minibatch_size                : 32
Using         replay_buffer_size            : 100000
Using         min_replay_buffer_size        : 32
Using         num_minibatches               : 4
Using default training_iterations           : 1
Using default print_interval   

2025-04-24 16:11:02.358930: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:388] MLIR V1 optimization pass is not enabled


NotFoundError: Unsuccessful TensorSliceReader constructor: Failed to find any matching files for checkpoints/leduc/nfsp/0/10000000/q_network_pid0