# Tako HRM - Model Evaluation

Evaluate trained TicTacToe models against baselines:
- Random play
- Perfect play (minimax)
- Self-play

## üöÄ Quick Start

1. Train a model first (see `01_train_tictactoe.ipynb`)
2. Run all cells to evaluate latest checkpoint
3. View win rates and sample games

In [None]:
# Setup
import os
if not os.path.exists('tako-v2'):
    !git clone https://github.com/zfdupont/tako-v2.git
%cd tako-v2

import sys
sys.path.insert(0, '/content/tako-v2')

print("‚úÖ Setup complete")

In [None]:
import torch
import yaml
from pathlib import Path
from model.hrm import HRM
from games.tictactoe import TicTacToeGame
from training.mcts import MCTS

# Load config
with open('config/tictactoe.yaml') as f:
    config = yaml.safe_load(f)

# Find latest checkpoint
checkpoint_dir = Path('checkpoints/tictactoe')
checkpoints = sorted(checkpoint_dir.glob('*.pt'), key=lambda p: p.stat().st_mtime)

if not checkpoints:
    print("‚ùå No checkpoints found. Train a model first.")
else:
    latest_ckpt = checkpoints[-1]
    print(f"Loading checkpoint: {latest_ckpt.name}")
    
    # Load model
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    model = HRM(**config['model'])
    
    checkpoint = torch.load(latest_ckpt, map_location=device)
    model.load_state_dict(checkpoint['model_state_dict'])
    model = model.to(device)
    model.eval()
    
    print(f"‚úÖ Model loaded on {device}")
    print(f"   Training step: {checkpoint.get('step', 'unknown')}")

In [None]:
# Evaluate vs Random Play
import numpy as np
from tqdm import tqdm

def play_vs_random(model, mcts_config, num_games=100, device='cpu'):
    """Play games vs random opponent."""
    mcts = MCTS(model, TicTacToeGame, mcts_config, device=device)
    
    results = {'win': 0, 'draw': 0, 'loss': 0}
    
    for _ in tqdm(range(num_games), desc="Playing vs Random"):
        game = TicTacToeGame()
        move_num = 0
        
        while not game.is_terminal():
            if move_num % 2 == 0:
                # Model's turn
                policy = mcts.search(game, move_num)
                action = np.argmax(policy)
            else:
                # Random opponent
                legal = game.legal_moves()
                action = np.random.choice(legal)
            
            game.make_move(action)
            move_num += 1
        
        outcome = game.outcome()
        if outcome == 1.0:
            results['win'] += 1
        elif outcome == 0.0:
            results['draw'] += 1
        else:
            results['loss'] += 1
    
    return results

# Run evaluation
print("\nEvaluating vs Random Play (100 games)...")
results = play_vs_random(model, config['mcts'], num_games=100, device=device)

print("\n" + "="*60)
print("Results vs Random Play")
print("="*60)
print(f"  Wins:   {results['win']:3d} ({results['win']:.0f}%)")
print(f"  Draws:  {results['draw']:3d} ({results['draw']:.0f}%)")
print(f"  Losses: {results['loss']:3d} ({results['loss']:.0f}%)")
print("="*60)

if results['win'] >= 90:
    print("\n‚úÖ Excellent! Model dominates random play.")
elif results['win'] >= 70:
    print("\n‚úÖ Good! Model is learning.")
else:
    print("\n‚ö†Ô∏è Model needs more training.")

In [None]:
# Visualize a sample game
print("\nSample Game (Model vs Random):")
print("="*60)

mcts = MCTS(model, TicTacToeGame, config['mcts'], device=device)
game = TicTacToeGame()
move_num = 0

print("\nInitial board:")
print(game)

while not game.is_terminal():
    if move_num % 2 == 0:
        # Model's turn
        policy = mcts.search(game, move_num)
        action = np.argmax(policy)
        player = "Model (X)"
    else:
        # Random opponent
        legal = game.legal_moves()
        action = np.random.choice(legal)
        player = "Random (O)"
    
    game.make_move(action)
    move_num += 1
    
    print(f"\nMove {move_num}: {player} plays position {action}")
    print(game)

outcome = game.outcome()
if outcome == 1.0:
    print("\nüéâ Model wins!")
elif outcome == 0.0:
    print("\nü§ù Draw")
else:
    print("\nüòû Model loses")

## üéØ Next Steps

- **Play interactively:** Open `03_interactive_play.ipynb`
- **Continue training:** If win rate < 90%, train for more epochs
- **Experiment:** Try different MCTS simulations (10, 25, 50, 100)