In [None]:
import torch
import numpy as np
from utils import KaggleAgent, GymAgent, evaluateKaggle, evaluateGym

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
# test CarRacing
from Environments.CarRacing.game import CarRacing
from Environments.CarRacing.model import MuZero as MuZeroCarRacing
from Environments.CarRacing.utils import MCTS as MCTSCarRacing

args = {
    'num_iterations': 20,
    'num_train_games': 100,
    'num_mcts_runs': 50,
    'num_epochs': 4,
    'batch_size': 64,
    'temperature': 1,
    'K': 5,
    'pb_c_base': 19625,
    'pb_c_init': 2,
    'N': 10,
    'dirichlet_alpha': 0.3,
    'dirichlet_epsilon': 0.05,
    'gamma': 0.997,
    'value_loss_weight': 0.25,
    'max_grad_norm': 5,
    'known_bounds': {} #{'min': 0, 'max': 1},
}

game = CarRacing(render=True)

model = MuZeroCarRacing(game, device).to(device)
model.load_state_dict(torch.load('Environments/CarRacing/Models/model.pt', map_location=device))
model.eval()

mcts = MCTSCarRacing(model, game, args)

TEMPERATURE = 0

observation, reward, is_terminal = game.get_initial_state()

with torch.no_grad():
    while not is_terminal:
        # img = observation.copy().reshape(96*3, 96)
        # cv2.imshow("observation", img)
        # cv2.waitKey(1)
        root = mcts.search(observation, 0)

        action_probs = [0] * game.action_size
        for child in root.children:
            action_probs[child.action_taken] = child.visit_count
        action_probs /= np.sum(action_probs)

        if TEMPERATURE == 0:
            action = np.argmax(action_probs)
        elif TEMPERATURE == float('inf'):
            action = np.random.choice([r for r in range(game.action_size) if action_probs[r] > 0])
        else:
            temperature_action_probs = action_probs ** (1 / TEMPERATURE)
            temperature_action_probs /= np.sum(temperature_action_probs)
            action = np.random.choice(len(temperature_action_probs), p=temperature_action_probs)

        observation, reward, is_terminal = game.step(action)

game.env.close()


In [None]:
# test TicTacToe
from Environments.TicTacToe.config import MuZeroConfigTicTacToe as Config

config = Config(
    cheatModel=False,
    cheatAvailableActions=False,
    cheatTerminalState=False,
    num_mcts_runs=60,
    c_init=1,
    dirichlet_epsilon=0.05,
)

config.model.load_state_dict(torch.load(f'Environments/{config.game}/Models/{config.model}_17.pt', map_location=device))
config.model.eval()

player = KaggleAgent(config.model, config.game, args={
    'search': True,
    'temperature': 0,
    'device': device,
    'config': config
})

evaluateKaggle("tictactoe", ["reaction", player.run], num_iterations=100)
evaluateKaggle("tictactoe", [player.run, "reaction"], num_iterations=100)