# Stratego RL Training on Kaggle

**Before running:**
1. Settings → Accelerator → GPU T4 x2
2. Click Save
3. Run cells in order

In [None]:
# Check GPU
!nvidia-smi

In [None]:
# Clone repository
!git clone https://github.com/charlie-tucker1/Stratego_RL.git
%cd Stratego_RL
!ls -la

In [None]:
# Install dependencies
!pip install -q gymnasium stable-baselines3 sb3-contrib tensorboard

In [None]:
# Verify setup
import numpy as np
import torch
from stratego_logic import StrategoEnv
from sb3_contrib import MaskablePPO

print(f"Numpy: {np.__version__}")
print(f"PyTorch: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    
print("\nAll imports successful! Ready to train.")

## Option 1: Fresh Training (2M steps)

In [None]:
# Start training
!python train.py

## Option 2: Resume from Checkpoint

If you have a checkpoint, upload it via Add Data → Upload, then run:

In [None]:
from sb3_contrib import MaskablePPO
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.callbacks import CheckpointCallback
from train import make_env, MetricsCallback
import os

# Path to uploaded checkpoint (update if needed)
CHECKPOINT_PATH = "/kaggle/input/your-dataset/stratego_ppo_1750000_steps.zip"

# Load model
model = MaskablePPO.load(CHECKPOINT_PATH, device="auto")

# Create environment
env = DummyVecEnv([make_env])
model.set_env(env)

# Continue training
print("Resuming training from checkpoint...")
model.learn(
    total_timesteps=250_000,
    callback=[
        MetricsCallback(eval_freq=10_000),
        CheckpointCallback(save_freq=50_000, save_path="./models", name_prefix="stratego_ppo")
    ],
    progress_bar=True,
    reset_num_timesteps=False
)

# Save final model
model.save("models/stratego_ppo_final")
print("Training complete!")

## Monitor with TensorBoard

In [None]:
%load_ext tensorboard
%tensorboard --logdir ./logs

## Evaluate Model

In [None]:
# Evaluate trained model
!python train.py --eval models/stratego_ppo_1750000_steps.zip --episodes 50

## Download Models

In [None]:
# Zip all models
!zip -r stratego_models.zip models/

# Create download link
from IPython.display import FileLink
FileLink('stratego_models.zip')

## Watch Agent Play

In [None]:
import numpy as np
from sb3_contrib import MaskablePPO
from stratego_logic import StrategoEnv

# Load model
model = MaskablePPO.load("models/stratego_ppo_1750000_steps.zip")

# Create environment
env = StrategoEnv(render_mode="human")

obs, info = env.reset()
done = False
total_reward = 0

print("Playing one game...\n")

step = 0
while not done and step < 500:
    action_mask = info.get("action_mask", np.ones(3600))
    action, _ = model.predict(obs, action_masks=action_mask, deterministic=True)
    
    obs, reward, done, truncated, info = env.step(action)
    total_reward += reward
    step += 1
    
    if step % 20 == 0:
        print(f"Step {step}: Cumulative reward = {total_reward:.2f}")
    
    if done or truncated:
        break

print(f"\nGame ended after {step} steps")
print(f"Total reward: {total_reward:.2f}")
print(f"Winner: {env.game.winner if hasattr(env.game, 'winner') else 'Unknown'}")