In [1]:
from games import TicTacToe

In [2]:
import torch

game = TicTacToe()
policy = torch.rand(9)  # Example policy
sym_pairs = game.symmetries(policy)

for i, (s, p) in enumerate(sym_pairs):
    print(f"Symmetry {i+1}:")
    print("State:\n", s)
    print("Policy:", p)
    print()

Symmetry 1:
State:
 tensor([[[[0., 0., 0.],
          [0., 0., 0.],
          [0., 0., 0.]],

         [[0., 0., 0.],
          [0., 0., 0.],
          [0., 0., 0.]],

         [[1., 1., 1.],
          [1., 1., 1.],
          [1., 1., 1.]]]])
Policy: tensor([0.9295, 0.9486, 0.8727, 0.5686, 0.5670, 0.5375, 0.8573, 0.9761, 0.8592])

Symmetry 2:
State:
 tensor([[[[0., 0., 0.],
          [0., 0., 0.],
          [0., 0., 0.]],

         [[0., 0., 0.],
          [0., 0., 0.],
          [0., 0., 0.]],

         [[1., 1., 1.],
          [1., 1., 1.],
          [1., 1., 1.]]]])
Policy: tensor([0.8727, 0.9486, 0.9295, 0.5375, 0.5670, 0.5686, 0.8592, 0.9761, 0.8573])

Symmetry 3:
State:
 tensor([[[[0., 0., 0.],
          [0., 0., 0.],
          [0., 0., 0.]],

         [[0., 0., 0.],
          [0., 0., 0.],
          [0., 0., 0.]],

         [[1., 1., 1.],
          [1., 1., 1.],
          [1., 1., 1.]]]])
Policy: tensor([0.8727, 0.5375, 0.8592, 0.9486, 0.5670, 0.9761, 0.9295, 0.5686, 0.8573])

S

In [1]:
from self_play import SelfPlayManager
from games import TicTacToe
from net import TicTacToeNet
from trainer import NeuralNetworkTrainer

net = TicTacToeNet()
trainer = NeuralNetworkTrainer(net)

print("----- Self-play generation started -----")
self_play_manager = SelfPlayManager(net, TicTacToe)
data = self_play_manager.generate_self_play(num_games=100, num_workers=4)

print()
print("----- Training started -----")
trainer.train(data, epochs=10)

print()

from promoter import ModelPromoter
from evaluator import ModelEvaluator

print("----- Model evaluation and promotion started -----")
evaluator = ModelEvaluator(game_class=TicTacToe, mcts_params={"num_simulations": 50})
promoter = ModelPromoter(model_dir="models", evaluator=evaluator, net_class=TicTacToeNet)
NUM_SELF_PLAY_GAMES = 100

best_net = promoter.get_best_model()
win_rate, metrics = promoter.evaluate_and_maybe_promote(net, metadata={"episode": 0})

print()
print("----- Evaluation complete -----")
# Optional: Print summary
print(f"[Summary] Win rate: {win_rate:.2%} | Metrics: {metrics}")

----- Self-play generation started -----
[SelfPlayManager] Collecting 100 games with 4 workers...


Self-play: 100%|██████████████████████████████| 100/100 [00:01<00:00, 79.34it/s]


[SelfPlayManager] Collected 100 games.

----- Training started -----
[Trainer] Training started...


Epochs: 100%|███████████████████████████████████| 10/10 [00:00<00:00, 11.61it/s]


[Trainer] Training finished. Loss: 0.0856, Policy Loss: 0.0828, Value Loss: 0.0028

----- Model evaluation and promotion started -----


Evaluating: 100%|██████████| 20/20 [00:00<00:00, 22.13it/s]

[Evaluator]: Candidate Win Rate: 50.00% (W:10 L:10 D:0)
[Promoter]: ❌ Candidate rejected (win rate: 50.00%)

----- Evaluation complete -----
[Summary] Win rate: 50.00% | Metrics: {'wins': 10, 'losses': 10, 'draws': 0, 'total': 20}



