In [1]:
import sys
sys.path.append('../../src/')
from tegame import Tegame,TegameML

import torch
import pickle
from tqdm import tqdm
from copy import deepcopy

%load_ext autoreload
%autoreload 2

In [2]:
# Number of simulations
N_sim = 100

# 2. Generate many independent initial game states
# These states will be used to evaluate the model across multiple
# randomized starting conditions, ensuring that the analysis is not
# biased by a single initial configuration.
states = []
for _ in range(N_sim):
    game = Tegame(players=2)
    game.restart()
    states.append(game.get_state().copy())

In [3]:
results_code = []

for i in tqdm(range(N_sim)):
    game = Tegame(players=2, verb_lvl=0,
                  thresh_nonmandatory=2,
                  thresh_secondchoice=4)
    game.restart()

    # Set the sampled initial state
    # This allows us to evaluate the model starting from the same
    # randomized initial conditions generated earlier.
    state = deepcopy(states[i])
    game.set_state(state)

    game_won = game.run_game()  # must return True (win) or False (loss)
    results_code.append(game_won)

print(f"Win Rate: {100 * sum(results_code) / len(results_code):.1f}%")

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 318.98it/s]

Win Rate: 2.0%





In [4]:
# Load the full saved model (architecture + weights).
# weights_only=False is required because we want to restore the entire
# model object, not just its parameter tensors.
model = torch.load("tegame_model.pth", weights_only=False)

In [5]:
results_ML = []

wins = 0
losses = 0

for i in tqdm(range(N_sim)):
    # Run the ML-based agent instead of the heuristic one
    game = TegameML(model, players=2, verb_lvl=0)
    game.restart()

    # Set the sampled initial state
    # This ensures we evaluate the ML model starting from the same
    # initial conditions used in the baseline simulations.
    state = deepcopy(states[i])
    game.set_state(state)

    game_won = game.run_game()  # must return True (win) or False (loss)
    results_ML.append(game_won)

print(f"Win Rate: {100 * sum(results_ML) / len(results_ML):.1f}%")

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:16<00:00,  6.02it/s]

Win Rate: 1.0%





> **Comment:** The model performs quite well, but it’s expected that its win rate is still lower than the hand‑crafted code. After all, the model is only learning to imitate the moves seen in the dataset — it’s not a reinforcement‑learning agent (at least not yet).