# TTTS vs PUCT Arena Testing

Test Bayesian MCTS (TTTS-IDS) against standard PUCT at various simulation counts.

**Setup:** Use `Runtime > Change runtime type > A100 GPU` for best performance.

In [None]:
# Check GPU
import torch
print(f"GPU available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    gpu_name = torch.cuda.get_device_name(0)
    print(f"GPU: {gpu_name}")
    print(f"Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")

In [None]:
# Install Rust toolchain
!curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
import os
os.environ["PATH"] = f"{os.environ['HOME']}/.cargo/bin:" + os.environ["PATH"]

# Verify Rust installation
!rustc --version

In [None]:
# Clone repository
!git clone https://github.com/caldred/nanozero.git
%cd nanozero

# Install Python dependencies
!pip install -q numpy scipy maturin

# Build and install Rust extension
%cd nanozero-mcts-rs
!maturin build --release
!pip install target/wheels/nanozero_mcts_rs-*.whl
%cd ..

# Verify Rust backend is available
!python -c "from nanozero.game import RUST_AVAILABLE; print(f'Rust backend available: {RUST_AVAILABLE}')"

In [None]:
# Upload checkpoint: Use the file upload button or this cell
# Option 1: Upload via Colab UI to checkpoints/connect4_iter150.pt
# Option 2: Mount Google Drive if you have it there
!mkdir -p checkpoints

# Uncomment to mount Google Drive:
# from google.colab import drive
# drive.mount('/content/drive')
# !cp /content/drive/MyDrive/path/to/connect4_iter150.pt checkpoints/

# Check if checkpoint exists
import os
if os.path.exists('checkpoints/connect4_iter150.pt'):
    print("Checkpoint found!")
else:
    print("Please upload checkpoints/connect4_iter150.pt")

In [None]:
import numpy as np
import torch
from scipy import stats
from nanozero.game import get_game
from nanozero.model import AlphaZeroTransformer
from nanozero.mcts import BatchedMCTS, BayesianMCTS
from nanozero.common import sample_action
from nanozero.config import get_model_config, MCTSConfig, BayesianMCTSConfig
from nanozero.common import get_device, load_checkpoint

device = get_device()
print(f"Device: {device}")

In [None]:
# Load game and model
game = get_game('connect4')
print(f"Game backend: {game.backend}")

model_config = get_model_config(game.config, n_layer=4)
model = AlphaZeroTransformer(model_config).to(device)
load_checkpoint('checkpoints/connect4_iter150.pt', model)
model.eval()
print("Model loaded!")

In [None]:
# Print current TTTS config
ttts_config = BayesianMCTSConfig()
print("Current BayesianMCTSConfig:")
print(f"  optimality_weight: {ttts_config.optimality_weight}")
print(f"  adaptive_weight: {ttts_config.adaptive_weight}")
print(f"  visit_scale: {ttts_config.visit_scale}")
print(f"  prune_threshold: {ttts_config.prune_threshold}")
print(f"  sigma_0: {ttts_config.sigma_0}")
print(f"  obs_var: {ttts_config.obs_var}")

In [None]:
def run_arena(game, model, puct_mcts, ttts_mcts, num_games, mcts_simulations):
    """Run arena, return results from TTTS perspective."""

    def make_puct_player():
        def play(state):
            policy = puct_mcts.search(
                state[np.newaxis, ...], model,
                num_simulations=mcts_simulations, add_noise=False
            )[0]
            return sample_action(policy, temperature=0)
        return play

    def make_ttts_player():
        def play(state):
            policy = ttts_mcts.search(
                state[np.newaxis, ...], model,
                num_simulations=mcts_simulations
            )[0]
            return sample_action(policy, temperature=0)
        return play

    ttts_player = make_ttts_player()
    puct_player = make_puct_player()

    wins, draws, losses = 0, 0, 0

    for i in range(num_games):
        state = game.initial_state()
        ttts_turn = 1 if i % 2 == 0 else -1

        while not game.is_terminal(state):
            current = game.current_player(state)
            if current == ttts_turn:
                action = ttts_player(state)
            else:
                action = puct_player(state)
            state = game.next_state(state, action)

        reward = game.terminal_reward(state)
        final_player = game.current_player(state)

        if final_player == ttts_turn:
            ttts_result = reward
        else:
            ttts_result = -reward

        if ttts_result > 0:
            wins += 1
        elif ttts_result < 0:
            losses += 1
        else:
            draws += 1

        if (i + 1) % 20 == 0:
            print(f"  Progress: {i+1}/{num_games}")

    return wins, draws, losses

## Test 1: Arena at different simulation counts

Test TTTS vs PUCT at 50, 100, 200, 400 simulations.

In [None]:
# Run arena at different simulation counts
puct_config = MCTSConfig()
puct_mcts = BatchedMCTS(game, puct_config)

ttts_config = BayesianMCTSConfig()
ttts_mcts = BayesianMCTS(game, ttts_config)

num_games = 100
results = {}

for n_sims in [50, 100, 200, 400]:
    print(f"\n{'='*50}")
    print(f"Testing with {n_sims} simulations")
    print(f"{'='*50}")
    
    np.random.seed(42)
    wins, draws, losses = run_arena(
        game, model, puct_mcts, ttts_mcts,
        num_games=num_games, mcts_simulations=n_sims
    )
    
    decisive = wins + losses
    win_rate = wins / decisive if decisive > 0 else 0
    
    # Binomial test
    if decisive > 0:
        p_value = stats.binomtest(wins, decisive, 0.5).pvalue
    else:
        p_value = 1.0
    
    results[n_sims] = {
        'wins': wins, 'draws': draws, 'losses': losses,
        'win_rate': win_rate, 'p_value': p_value
    }
    
    print(f"\nResults: TTTS {wins}W / {draws}D / {losses}L")
    print(f"Decisive win rate: {win_rate:.1%}")
    print(f"p-value: {p_value:.4f}")

In [None]:
# Summary table
print("\n" + "="*60)
print("SUMMARY: TTTS vs PUCT (TTTS perspective)")
print("="*60)
print(f"{'Sims':<8} {'Wins':<8} {'Draws':<8} {'Losses':<8} {'Win%':<10} {'p-value':<10}")
print("-"*60)
for n_sims, r in results.items():
    sig = "*" if r['p_value'] < 0.05 else ""
    print(f"{n_sims:<8} {r['wins']:<8} {r['draws']:<8} {r['losses']:<8} {r['win_rate']:.1%:<10} {r['p_value']:.4f}{sig}")

## Test 2: Sweep optimality_weight values

Try different base optimality weights to find a better balance.

In [None]:
# Test different optimality weights at 200 sims
n_sims = 200
num_games = 50

weight_results = {}

for opt_weight in [0.0, 0.3, 0.5, 0.7, 1.0]:
    print(f"\n{'='*50}")
    print(f"optimality_weight = {opt_weight}, adaptive = True")
    print(f"{'='*50}")
    
    ttts_config = BayesianMCTSConfig(
        optimality_weight=opt_weight,
        adaptive_weight=True,
        visit_scale=50.0
    )
    ttts_mcts = BayesianMCTS(game, ttts_config)
    
    np.random.seed(42)
    wins, draws, losses = run_arena(
        game, model, puct_mcts, ttts_mcts,
        num_games=num_games, mcts_simulations=n_sims
    )
    
    decisive = wins + losses
    win_rate = wins / decisive if decisive > 0 else 0
    
    weight_results[opt_weight] = {
        'wins': wins, 'draws': draws, 'losses': losses,
        'win_rate': win_rate
    }
    
    print(f"Results: TTTS {wins}W / {draws}D / {losses}L ({win_rate:.1%})")

In [None]:
# Summary of weight sweep
print("\n" + "="*50)
print(f"WEIGHT SWEEP @ {n_sims} sims (adaptive=True)")
print("="*50)
print(f"{'Weight':<10} {'Wins':<8} {'Draws':<8} {'Losses':<8} {'Win%':<10}")
print("-"*50)
for w, r in weight_results.items():
    print(f"{w:<10} {r['wins']:<8} {r['draws']:<8} {r['losses']:<8} {r['win_rate']:.1%}")

## Test 3: Adaptive vs non-adaptive

In [None]:
# Compare adaptive vs non-adaptive at best weight
n_sims = 200
num_games = 50
opt_weight = 0.5  # Try the middle value

adaptive_results = {}

for adaptive in [False, True]:
    print(f"\n{'='*50}")
    print(f"optimality_weight = {opt_weight}, adaptive = {adaptive}")
    print(f"{'='*50}")
    
    ttts_config = BayesianMCTSConfig(
        optimality_weight=opt_weight,
        adaptive_weight=adaptive,
        visit_scale=50.0
    )
    ttts_mcts = BayesianMCTS(game, ttts_config)
    
    np.random.seed(42)
    wins, draws, losses = run_arena(
        game, model, puct_mcts, ttts_mcts,
        num_games=num_games, mcts_simulations=n_sims
    )
    
    decisive = wins + losses
    win_rate = wins / decisive if decisive > 0 else 0
    
    adaptive_results[adaptive] = {
        'wins': wins, 'draws': draws, 'losses': losses,
        'win_rate': win_rate
    }
    
    print(f"Results: TTTS {wins}W / {draws}D / {losses}L ({win_rate:.1%})")

print("\n" + "="*50)
print(f"ADAPTIVE COMPARISON @ {n_sims} sims, weight={opt_weight}")
print("="*50)
print(f"{'Adaptive':<10} {'Wins':<8} {'Draws':<8} {'Losses':<8} {'Win%':<10}")
print("-"*50)
for a, r in adaptive_results.items():
    print(f"{str(a):<10} {r['wins']:<8} {r['draws']:<8} {r['losses']:<8} {r['win_rate']:.1%}")