In [1]:
import torch
from Environments.TicTacToe.model import MuZeroResNet, MuZeroResNetCheat
from Environments.TicTacToe.game import TicTacToe
from utils import KaggleAgent, GymAgent, evaluateKaggle, evaluateGym

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
game = TicTacToe()

CHEAT = True

if CHEAT:
    model = MuZeroResNetCheat({
        'predictionFunction': {
            'num_resBlocks': 4,
            'hidden_planes': 128,
            'screen_size': 9,
            'action_size': 9,
            'value_support_size': 1,
            'value_activation': 'tanh'
        }
    }).to(device)

else:
    model = MuZeroResNet({
        'predictionFunction': {
            'num_resBlocks': 3,
            'hidden_planes': 64,
            'screen_size': 9,
            'action_size': 9,
            'value_support_size': 1,
            'value_activation': 'tanh'
        },
        'dynamicsFunction': {
            'num_resBlocks': 4,
            'hidden_planes': 64,
            'predict_reward': False, 
            'reward_support_size': 1
        },
        'representationFunction': {
            'num_resBlocks': 3,
            'hidden_planes': 32
        },
    }).to(device)

model.load_state_dict(torch.load(f'Environments/{game}/Models/{model}.pt', map_location=device))
model.eval()
player = KaggleAgent(model, game, args={
    'search': True,
    'temperature': 0,
    'num_mcts_runs': 100,
    'c_init': 1,
    'c_base': 19625,
    'dirichlet_alpha': 0.3,
    'dirichlet_epsilon': 0.05,
    'cheatAvailableActions': False,
    'cheatTerminalState': False
})

evaluateKaggle("tictactoe", ["reaction", player.run], num_iterations=100)
evaluateKaggle("tictactoe", [player.run, "reaction"], num_iterations=100)


Player 1 | Wins: 47 | Draws: 44 | Losses: 9
Player 2 | Wins: 9 | Draws: 44 | Losses: 47
    

Player 1 | Wins: 30 | Draws: 53 | Losses: 17
Player 2 | Wins: 17 | Draws: 53 | Losses: 30
    
