In [1]:
from self_play import SelfPlayManager
from games import TicTacToe
from net import TicTacToeNet
from trainer import NeuralNetworkTrainer

net = TicTacToeNet()
trainer = NeuralNetworkTrainer(net, device='cpu')

print("----- Self-play generation started -----")
self_play_manager = SelfPlayManager(net, TicTacToe)
data = self_play_manager.generate_self_play(num_games=100, num_workers=4)


----- Self-play generation started -----
[SelfPlayManager] Collecting 100 games with 4 workers...


[SelfPlayManager] Self-play: 100%|████████████| 100/100 [00:05<00:00, 17.39it/s]


[SelfPlayManager] Collected 100 games.


In [2]:

print()
print("----- Training started -----")
trainer.train(data, epochs=10)

print("Are the models the same? ", net == trainer.net)
print()



----- Training started -----
[Trainer] Training started...


[Trainer] Epochs: 100%|█████████████████████████| 10/10 [00:01<00:00,  8.64it/s]

[Trainer] Training finished. Loss: 30.5803, Policy Loss: 20.8856, Value Loss: 9.6946
Are the models the same?  True






In [3]:
from promoter import ModelPromoter
from evaluator import ModelEvaluator

print("----- Model evaluation and promotion started -----")
evaluator = ModelEvaluator(game_class=TicTacToe, mcts_params={"num_simulations": 150})
promoter = ModelPromoter(model_dir="models", evaluator=evaluator, net_class=TicTacToeNet)

best_net = promoter.get_best_model()
win_rate, metrics = promoter.evaluate_and_maybe_promote(net, metadata={"episode": 0})

print()
print("----- Evaluation complete -----")
# Optional: Print summary
print(f"[Summary] Win rate: {win_rate:.2%} | Metrics: {metrics}")

----- Model evaluation and promotion started -----


[Evaluator] Evaluating: 100%|███████████████████| 20/20 [00:03<00:00,  6.61it/s]

[Evaluator]: Candidate Win Rate: 64.29% (W:9 L:5 D:6)
[Promoter]: ✅ Promoted new model with win rate 64.29% → models/model_20250714_173742.pt
Metadata: {'episode': 0}

----- Evaluation complete -----
[Summary] Win rate: 64.29% | Metrics: {'wins': 9, 'losses': 5, 'draws': 6, 'total': 14, 'win_rate': 0.6428571428571429}





In [4]:
from main import human_vs_ai

In [9]:
human_vs_ai(model = net)

You are playing as X (1). Type moves like: 3 4
 . . .
 . . .
 . . .

 . . .
 . X .
 . . .

🤖 AI is thinking...
🤖 AI plays: (0, 1)
 . O .
 . X .
 . . .

Illegal move, try again.
 . O .
 . X .
 . . .

 X O .
 . X .
 . . .

🤖 AI is thinking...
🤖 AI plays: (2, 2)
 X O .
 . X .
 . . O

 X O X
 . X .
 . . O

🤖 AI is thinking...
🤖 AI plays: (2, 1)
 X O X
 . X .
 . O O

 X O X
 . X .
 X O O

🎉 You win!
