# NRL Engine - Colab Runner

This notebook clones the NRL Engine repo, installs dependencies, and runs the evaluation pipeline.

**Setup:**
1. Replace `YOUR_USERNAME` with your GitHub username
2. For private repos, you'll need a GitHub token
3. Run all cells

In [None]:
# =============================================================================
# CELL 1: MOUNT DRIVE
# =============================================================================
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# =============================================================================
# CELL 2: CLONE/UPDATE REPO
# =============================================================================
import os

# Configuration - EDIT THESE
GITHUB_USER = "YOUR_USERNAME"  # <-- Replace with your GitHub username
REPO_NAME = "nrl-engine"
BRANCH = "main"

# For private repos, uncomment and set your token:
# GITHUB_TOKEN = "ghp_xxxxxxxxxxxx"
# REPO_URL = f"https://{GITHUB_TOKEN}@github.com/{GITHUB_USER}/{REPO_NAME}.git"

# For public repos:
REPO_URL = f"https://github.com/{GITHUB_USER}/{REPO_NAME}.git"

REPO_DIR = f"/content/{REPO_NAME}"

if os.path.exists(REPO_DIR):
    print(f"Repo exists, pulling latest...")
    !cd {REPO_DIR} && git pull origin {BRANCH}
else:
    print(f"Cloning repo...")
    !git clone -b {BRANCH} {REPO_URL} {REPO_DIR}

print(f"\n✓ Repo ready: {REPO_DIR}")

In [None]:
# =============================================================================
# CELL 3: INSTALL DEPENDENCIES
# =============================================================================
!pip install -q -e /content/nrl-engine

# Verify installation
import nrl_engine
print(f"✓ nrl_engine v{nrl_engine.__version__} installed")

In [None]:
# =============================================================================
# CELL 4: SETUP PATHS
# =============================================================================
from nrl_engine.config import Config

config = Config()
config.ensure_dirs()

print(f"Base dir: {config.base_dir}")
print(f"Proc dir: {config.proc_dir}")
print(f"Raw dir:  {config.raw_dir}")
print(f"Eval dir: {config.eval_dir}")

In [None]:
# =============================================================================
# CELL 5: LOAD DATA
# =============================================================================
from nrl_engine.data.loader import DataLoader
from nrl_engine.data.sample_data import generate_sample_data, validate_sample_data

# Try to load from files first
loader = DataLoader(config)

try:
    model_data, load_meta = loader.load(prefer="proc")
    print(f"Source: {load_meta['source']}")
    for note in load_meta['notes']:
        print(f"  {note}")
except FileNotFoundError:
    print("No data files found - generating sample data...")
    model_data = generate_sample_data(n_matches=500)
    
    # Validate
    validation = validate_sample_data(model_data)
    print(f"\nValidation: {validation}")
    
    # Save for future runs
    save_path = loader.save_to_proc(model_data, prefix="nrl_sample")
    print(f"\n✓ Saved to: {save_path}")

print(f"\n✓ Data shape: {model_data.shape}")
print(f"✓ Seasons: {sorted(model_data['date'].dt.year.unique())}")

In [None]:
# =============================================================================
# CELL 6: RUN EVALUATION
# =============================================================================
from nrl_engine.evaluation.harness import EvaluationHarness

# Create harness
harness = EvaluationHarness(model_data, config)

# Run evaluation
# - test_seasons: None = auto-detect, or specify like [2023, 2024, 2025]
# - fold_type: "anchored" (all prior data) or "rolling" (fixed window)
results = harness.run_evaluation(
    test_seasons=None,  # Auto-detect
    fold_type="anchored"
)

In [None]:
# =============================================================================
# CELL 7: SAVE ARTIFACTS
# =============================================================================
paths = harness.save_artifacts(results)

print("\nArtifacts saved:")
for name, path in paths.items():
    print(f"  {name}: {path}")

In [None]:
# =============================================================================
# CELL 8: VIEW RESULTS
# =============================================================================
import json

print("=" * 60)
print("RESULTS SUMMARY")
print("=" * 60)

print(f"\nOdds orientation: {results['odds_report'].get('chosen', 'unknown')}")
print(f"Action taken: {results['odds_report'].get('action', 'none')}")

print(f"\nFolds:")
for fold in results['fold_results']:
    print(f"  Fold {fold['fold_id']}: Season {fold['test_season']}, "
          f"Accuracy {fold['accuracy']:.1%}")

print(f"\nModel Metrics:")
print(json.dumps(results['metrics']['model_metrics'], indent=2))

if results['metrics'].get('market_metrics'):
    print(f"\nMarket Metrics:")
    print(json.dumps(results['metrics']['market_metrics'], indent=2))

In [None]:
# =============================================================================
# CELL 9: DISPLAY CALIBRATION PLOT
# =============================================================================
import matplotlib.pyplot as plt

cal = results['metrics']['model_metrics'].get('calibration', {})

if 'predicted' in cal:
    plt.figure(figsize=(8, 6))
    plt.plot(cal['predicted'], cal['actual'], 'o-', label='Model')
    plt.plot([0, 1], [0, 1], '--', color='gray', label='Perfect')
    plt.xlabel('Predicted Probability')
    plt.ylabel('Observed Frequency')
    plt.title('Calibration Curve')
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.show()
else:
    print("No calibration data available")

In [None]:
# =============================================================================
# CELL 10: QUICK ACCESS TO PREDICTIONS
# =============================================================================
predictions = results['predictions']

print(f"Predictions shape: {predictions.shape}")
print(f"\nSample predictions:")
display(predictions.head(10))