# Single-Day Slate-Level Backtest with Benchmark Comparison

This notebook trains a single slate-level XGBoost model and compares against season average benchmark.

In [None]:
import sys
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath('.')))

import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

# Data
from src.data.loaders.historical_loader import HistoricalDataLoader
from src.data.storage.parquet_storage import ParquetStorage

# Features
from src.features.pipeline import FeaturePipeline
from src.utils.feature_config import load_feature_config

# Models
from src.models.xgboost_model import XGBoostModel

# Evaluation
from src.evaluation.benchmarks.season_average import SeasonAverageBenchmark

print('Imports complete')

## Configuration

In [None]:
# Dates
PREDICTION_DATE = '20250115'
TRAIN_START = '20241001'
TRAIN_END = '20250114'

# Model config
XGBOOST_CONFIG = {
    'max_depth': 6,
    'learning_rate': 0.05,
    'n_estimators': 200,
    'min_child_weight': 3,
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    'objective': 'reg:squarederror',
    'random_state': 42
}

# Salary tiers
SALARY_TIERS = [0, 4000, 6000, 8000, 15000]

print(f"Configuration set for slate prediction on {PREDICTION_DATE}")

## Load Data

In [None]:
storage = ParquetStorage()
loader = HistoricalDataLoader(storage)

# Load test slate
test_slate = loader.load_slate_data(PREDICTION_DATE)
print(f"Test slate: {len(test_slate)} players")

# Load training data
train_data = loader.load_historical_data(TRAIN_START, TRAIN_END)
print(f"Training data: {len(train_data)} records from {train_data['playerID'].nunique()} players")

# Date range check
print(f"\nTraining date range: {train_data['gameDate'].min()} to {train_data['gameDate'].max()}")
print(f"Test date: {PREDICTION_DATE}")

## Initialize and Fit Benchmark

In [None]:
# Create benchmark using training data
benchmark = SeasonAverageBenchmark(min_games=5)
benchmark.fit(train_data)

print(f"Benchmark fitted for {len(benchmark.player_averages)} players")

# Generate benchmark predictions
test_slate['benchmark_pred'] = test_slate['playerID'].map(benchmark.player_averages).fillna(0)

# Coverage
has_benchmark = test_slate['benchmark_pred'] > 0
print(f"Benchmark coverage: {has_benchmark.sum()}/{len(test_slate)} ({has_benchmark.mean()*100:.1f}%)")

## Build Features

In [None]:
# Load feature configuration
feature_config = load_feature_config('default_features')
pipeline = feature_config.build_pipeline(FeaturePipeline)

# Generate training features
print("Generating training features...")
train_features = pipeline.fit_transform(train_data)

# Feature columns
feature_cols = [col for col in train_features.columns 
               if col not in ['playerID', 'gameDate', 'fpts', 'playerName']]

print(f"Generated {len(feature_cols)} features")
print(f"Training shape: {train_features.shape}")

## Train Slate-Level Model

In [None]:
# Prepare training data
X_train = train_features[feature_cols]
y_train = train_features['fpts']

# Remove NaN values
mask = ~(X_train.isna().any(axis=1) | y_train.isna())
X_train = X_train[mask]
y_train = y_train[mask]

print(f"Training samples after cleaning: {len(X_train)}")

# Train model
slate_model = XGBoostModel(XGBOOST_CONFIG)
slate_model.train(X_train, y_train)

print("Slate-level model trained")

# Feature importance
importance = slate_model.get_feature_importance()
top_features = sorted(zip(feature_cols, importance), key=lambda x: x[1], reverse=True)[:10]

print("\nTop 10 features:")
for feat, imp in top_features:
    print(f"  {feat}: {imp:.4f}")

## Generate Test Features

In [None]:
# Load historical data for test players
test_player_history = loader.load_historical_player_logs(PREDICTION_DATE, lookback_days=365)

# Filter to test slate players
test_players = test_slate['playerID'].unique()
test_player_history = test_player_history[test_player_history['playerID'].isin(test_players)]

print(f"Test player history: {len(test_player_history)} records")

# Generate features for test data
test_features = pipeline.transform(test_player_history)

# Get most recent features for each player
latest_features = test_features.sort_values('gameDate').groupby('playerID').last()

print(f"Test features for {len(latest_features)} players")

## Generate Model Predictions

In [None]:
# Prepare test features
test_slate_features = test_slate.set_index('playerID').join(latest_features[feature_cols], how='left')

# Generate predictions
X_test = test_slate_features[feature_cols]
has_features = ~X_test.isna().all(axis=1)

# Initialize predictions
test_slate['model_pred'] = 0

# Predict for players with features
if has_features.sum() > 0:
    X_valid = X_test[has_features].fillna(0)
    predictions = slate_model.predict(X_valid)
    test_slate.loc[has_features, 'model_pred'] = predictions

print(f"Model predictions for {has_features.sum()}/{len(test_slate)} players")

# Preview predictions
print("\nSample predictions:")
sample = test_slate[['playerName', 'salary', 'model_pred', 'benchmark_pred', 'fpts']].head(10)
print(sample)

## Compare Model vs Benchmark

In [None]:
# Filter to players with both predictions
has_both = (test_slate['model_pred'] > 0) & (test_slate['benchmark_pred'] > 0)
comparison_data = test_slate[has_both].copy()

print(f"Comparing {len(comparison_data)} players with both predictions")

# Overall comparison
comparison = benchmark.compare_with_model(
    actual=comparison_data['fpts'],
    model_pred=comparison_data['model_pred'],
    benchmark_pred=comparison_data['benchmark_pred']
)

print(comparison['summary'])

## Salary Tier Analysis

In [None]:
# Compare by salary tier
tier_comparison = benchmark.compare_by_salary_tier(comparison_data, SALARY_TIERS)

print("Performance by Salary Tier:")
print("=" * 100)
print(tier_comparison.to_string(index=False))

# Visualize which tiers benefit from the model
print("\n" + "=" * 80)
print("Model Performance vs Benchmark by Tier:")
print("=" * 80)

for _, row in tier_comparison.iterrows():
    mape_imp = row['mape_improvement']
    rmse_imp = row['rmse_improvement']
    
    # Determine overall performance
    if mape_imp > 0 and rmse_imp > 0:
        overall = "BETTER"
    elif mape_imp < 0 and rmse_imp < 0:
        overall = "WORSE"
    else:
        overall = "MIXED"
    
    print(f"{row['salary_tier']:20} {overall:8} | "
          f"MAPE: {mape_imp:+6.1f}% | RMSE: {rmse_imp:+6.2f} pts | "
          f"N={row['n_players']:3d}")

## Error Distribution Analysis

In [None]:
# Calculate errors
comparison_data['model_error'] = comparison_data['model_pred'] - comparison_data['fpts']
comparison_data['benchmark_error'] = comparison_data['benchmark_pred'] - comparison_data['fpts']
comparison_data['model_abs_error'] = np.abs(comparison_data['model_error'])
comparison_data['benchmark_abs_error'] = np.abs(comparison_data['benchmark_error'])

# Error statistics
print("Error Distribution:")
print("=" * 60)

print("\nModel Errors:")
print(f"  Mean Error (bias): {comparison_data['model_error'].mean():.2f} pts")
print(f"  Std Error: {comparison_data['model_error'].std():.2f} pts")
print(f"  Median Abs Error: {comparison_data['model_abs_error'].median():.2f} pts")
print(f"  95th Percentile Abs Error: {comparison_data['model_abs_error'].quantile(0.95):.2f} pts")

print("\nBenchmark Errors:")
print(f"  Mean Error (bias): {comparison_data['benchmark_error'].mean():.2f} pts")
print(f"  Std Error: {comparison_data['benchmark_error'].std():.2f} pts")
print(f"  Median Abs Error: {comparison_data['benchmark_abs_error'].median():.2f} pts")
print(f"  95th Percentile Abs Error: {comparison_data['benchmark_abs_error'].quantile(0.95):.2f} pts")

# Win rate
model_wins = comparison_data['model_abs_error'] < comparison_data['benchmark_abs_error']
print(f"\nModel Win Rate: {model_wins.mean()*100:.1f}%")

## Top Predictions Comparison

In [None]:
# Compare top predicted players
TOP_N = 20

# Top by model
top_model = comparison_data.nlargest(TOP_N, 'model_pred')
model_top_actual = top_model['fpts'].sum()
model_top_predicted = top_model['model_pred'].sum()

# Top by benchmark
top_benchmark = comparison_data.nlargest(TOP_N, 'benchmark_pred')
benchmark_top_actual = top_benchmark['fpts'].sum()
benchmark_top_predicted = top_benchmark['benchmark_pred'].sum()

print(f"Top {TOP_N} Players Comparison:")
print("=" * 60)

print(f"\nModel's Top {TOP_N}:")
print(f"  Predicted Total: {model_top_predicted:.1f} pts")
print(f"  Actual Total: {model_top_actual:.1f} pts")
print(f"  Error: {model_top_predicted - model_top_actual:.1f} pts")
print(f"  MAPE: {np.mean(np.abs(top_model['model_pred'] - top_model['fpts']) / top_model['fpts']) * 100:.1f}%")

print(f"\nBenchmark's Top {TOP_N}:")
print(f"  Predicted Total: {benchmark_top_predicted:.1f} pts")
print(f"  Actual Total: {benchmark_top_actual:.1f} pts")
print(f"  Error: {benchmark_top_predicted - benchmark_top_actual:.1f} pts")
print(f"  MAPE: {np.mean(np.abs(top_benchmark['benchmark_pred'] - top_benchmark['fpts']) / top_benchmark['fpts']) * 100:.1f}%")

# Overlap
overlap = set(top_model['playerID']) & set(top_benchmark['playerID'])
print(f"\nOverlap: {len(overlap)}/{TOP_N} players in both top lists")

## Export Results

In [None]:
# Save comparison results
output_file = f'slate_benchmark_comparison_{PREDICTION_DATE}.csv'
comparison_data.to_csv(output_file, index=False)
print(f"Results saved to {output_file}")

# Save summary metrics
summary_metrics = pd.DataFrame([
    {'metric': 'model_mape', 'value': comparison['model']['mape']},
    {'metric': 'benchmark_mape', 'value': comparison['benchmark']['mape']},
    {'metric': 'mape_improvement', 'value': comparison['improvement']['mape_improvement']},
    {'metric': 'model_rmse', 'value': comparison['model']['rmse']},
    {'metric': 'benchmark_rmse', 'value': comparison['benchmark']['rmse']},
    {'metric': 'rmse_improvement', 'value': comparison['improvement']['rmse_improvement']},
    {'metric': 'model_correlation', 'value': comparison['model']['correlation']},
    {'metric': 'benchmark_correlation', 'value': comparison['benchmark']['correlation']},
    {'metric': 'model_win_rate', 'value': model_wins.mean()}
])

summary_file = f'slate_summary_metrics_{PREDICTION_DATE}.csv'
summary_metrics.to_csv(summary_file, index=False)
print(f"Summary metrics saved to {summary_file}")