# NBA DFS Season Backtest - Per Player Models with Benchmark Comparison

Walk-forward backtesting across multiple slates using per-player XGBoost models and season average benchmark comparison.

## Methodology

1. Load historical data for training
2. Build features using YAML-configured rolling statistics
3. Train separate XGBoost model per player on historical data
4. Calculate season average benchmark for comparison
5. Walk forward through test period, generating predictions for each slate
6. Compare predictions to actual results
7. Analyze model vs benchmark performance overall and by salary tier
8. Statistical significance testing across all slates

## Setup

In [6]:
import sys
from pathlib import Path
import logging
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
from datetime import datetime, timedelta
from tqdm import tqdm
from scipy import stats

repo_root = Path.cwd().parent
if str(repo_root) not in sys.path:
    sys.path.insert(0, str(repo_root))

from src.data.storage.sqlite_storage import SQLiteStorage
from src.data.loaders.historical_loader import HistoricalDataLoader
from src.models.xgboost_model import XGBoostModel
from src.features.pipeline import FeaturePipeline
from src.features.transformers.rolling_stats import RollingStatsTransformer
from src.features.transformers.ewma import EWMATransformer
from src.utils.fantasy_points import calculate_dk_fantasy_points
from src.evaluation.metrics.accuracy import MAPEMetric, RMSEMetric, MAEMetric, CorrelationMetric
from src.evaluation.benchmarks.season_average import SeasonAverageBenchmark
from src.utils.feature_config import load_feature_config

pd.set_option('display.max_rows', 20)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)

logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

print('Setup complete')

Setup complete


## Configuration

In [12]:
DB_PATH = repo_root / 'nba_dfs.db'
OUTPUT_DIR = repo_root / 'data' / 'outputs'
MODELS_DIR = repo_root / 'data' / 'models'

NUM_SEASONS = 1
FEATURE_CONFIG = 'default_features'
MIN_PLAYER_GAMES = 10
MIN_GAMES_FOR_BENCHMARK = 5

# Model saving configuration
SAVE_MODELS = True  # Set to True to save models during backtesting

feature_config = load_feature_config(FEATURE_CONFIG)

TEST_START = '20250205'
TEST_END = '20250206'

TRAIN_START = HistoricalDataLoader.get_season_start_date(TEST_START) if NUM_SEASONS == 1 else HistoricalDataLoader.get_previous_season_start_date(TEST_START)
TEST_END_DT = datetime.strptime(TEST_END, '%Y%m%d')
TRAIN_END = (TEST_END_DT - timedelta(days=1)).strftime('%Y%m%d')

SALARY_TIERS = [0, 4000, 6000, 8000, 15000]

print(f'Configuration:')
print(f'  Database: {DB_PATH}')
print(f'  Output Directory: {OUTPUT_DIR}')
print(f'  Models Directory: {MODELS_DIR}')
print(f'  Save Models: {SAVE_MODELS}')
print(f'  Feature Config: {FEATURE_CONFIG}')
print(f'  Training Period: {TRAIN_START} to {TRAIN_END}')
print(f'  Testing Period: {TEST_START} to {TEST_END}')
print(f'  Number of Seasons: {NUM_SEASONS}')
print(f'  Min Player Games: {MIN_PLAYER_GAMES}')
print(f'  Min Benchmark Games: {MIN_GAMES_FOR_BENCHMARK}')
print(f'  Salary Tiers: {SALARY_TIERS}')

2025-10-13 03:54:33,661 - src.utils.feature_config - INFO - Loaded feature config: Default Feature Set


Configuration:
  Database: c:\Users\antho\OneDrive\Documents\Repositories\delapan-fantasy\nba_dfs.db
  Output Directory: c:\Users\antho\OneDrive\Documents\Repositories\delapan-fantasy\data\outputs
  Models Directory: c:\Users\antho\OneDrive\Documents\Repositories\delapan-fantasy\data\models
  Save Models: True
  Feature Config: default_features
  Training Period: 20241001 to 20250205
  Testing Period: 20250205 to 20250206
  Number of Seasons: 1
  Min Player Games: 10
  Min Benchmark Games: 5
  Salary Tiers: [0, 4000, 6000, 8000, 15000]


## Data Loading

In [13]:
storage = SQLiteStorage(str(DB_PATH))
loader = HistoricalDataLoader(storage)

print('Loading historical training data...')
training_data = loader.load_historical_player_logs(start_date=TRAIN_START, end_date=TRAIN_END)

print(f'Loaded {len(training_data)} training samples')
print(f'Players: {training_data["playerID"].nunique()}')
print(f'Date range: {training_data["gameDate"].min()} to {training_data["gameDate"].max()}')

training_data['gameDate'] = pd.to_datetime(training_data['gameDate'], format='%Y%m%d', errors='coerce')

if 'fpts' not in training_data.columns:
    training_data['fpts'] = training_data.apply(calculate_dk_fantasy_points, axis=1)

print(f'\nSample training data:')
print(training_data.head(10))

2025-10-13 03:54:34,736 - src.data.storage.sqlite_storage - INFO - Initialized SQLiteStorage with database: c:\Users\antho\OneDrive\Documents\Repositories\delapan-fantasy\nba_dfs.db
2025-10-13 03:54:34,736 - src.data.loaders.historical_loader - INFO - Loading historical player logs (from 20241001 to 20250205)
2025-10-13 03:54:34,845 - src.data.loaders.historical_loader - INFO - Loaded 16022 player logs from 2024-10-22 00:00:00 to 2025-02-04 00:00:00


Loading historical training data...
Loaded 16022 training samples
Players: 528
Date range: 2024-10-22 00:00:00 to 2025-02-04 00:00:00

Sample training data:
       playerID                  longName team teamAbv teamID  \
0   28968646399             Rui Hachimura  LAL     LAL     14   
1   28828385129              Gabe Vincent  LAL     LAL     14   
2   28858866027                  Naz Reid  MIN     MIN     18   
3  942947235539              Max Christie  LAL     LAL     14   
4   28236562792               Mike Conley  MIN     MIN     18   
5   28268636399             Austin Reaves  LAL     LAL     14   
6   28438011729             Julius Randle  MIN     MIN     18   
7   28838935349               Rudy Gobert  MIN     MIN     18   
8   28198607869  Nickeil Alexander-Walker  MIN     MIN     18   
9   94344202027           Anthony Edwards  MIN     MIN     18   

             gameID   gameDate pos mins pts reb ast stl blk TOV PF fga fgm  \
0  20241022_MIN@LAL 2024-10-22       35  18   5  

## Feature Pipeline

In [14]:
pipeline = feature_config.build_pipeline(FeaturePipeline)

print(f'Feature pipeline configured with {len(pipeline.transformers)} transformers:')
for i, transformer in enumerate(pipeline.transformers, 1):
    print(f'  {i}. {transformer.__class__.__name__}')

print('\nBuilding features from training data...')
training_data_sorted = training_data.sort_values(['playerID', 'gameDate'])
training_features = pipeline.fit_transform(training_data_sorted)

print(f'Generated {len(training_features)} feature rows')
print(f'Feature columns: {len([col for col in training_features.columns if col.startswith(("rolling_", "ewma_"))])}')

print(f'\nFeature columns:')
feature_cols = [col for col in training_features.columns if col.startswith(("rolling_", "ewma_"))]
for col in sorted(feature_cols)[:20]:
    print(f'  {col}')
if len(feature_cols) > 20:
    print(f'  ... and {len(feature_cols) - 20} more')

2025-10-13 03:54:35,941 - src.utils.feature_config - INFO - Added RollingStatsTransformer: windows=[3, 5, 10], stats=21, include_std=True
2025-10-13 03:54:35,941 - src.utils.feature_config - INFO - Added EWMATransformer: span=5, stats=21


Feature pipeline configured with 2 transformers:
  1. RollingStatsTransformer
  2. EWMATransformer

Building features from training data...


                                                                    

Generated 16022 feature rows
Feature columns: 147

Feature columns:
  ewma_DefReb_5
  ewma_OffReb_5
  ewma_PF_5
  ewma_TOV_5
  ewma_ast_5
  ewma_blk_5
  ewma_fga_5
  ewma_fgm_5
  ewma_fgp_5
  ewma_fta_5
  ewma_ftm_5
  ewma_ftp_5
  ewma_mins_5
  ewma_plusMinus_5
  ewma_pts_5
  ewma_reb_5
  ewma_stl_5
  ewma_tptfga_5
  ewma_tptfgm_5
  ewma_tptfgp_5
  ... and 127 more




## Initialize Benchmark

Create season average benchmark for comparison with per-player models.

In [15]:
df_qualified = training_features[training_features.groupby('playerID')['playerID'].transform('size') >= MIN_GAMES_FOR_BENCHMARK].copy()

benchmark = SeasonAverageBenchmark(min_games=MIN_GAMES_FOR_BENCHMARK)
benchmark.fit(df_qualified)

print(f'Benchmark fitted for {len(benchmark.player_averages)} players')
print(f'  (min_games={MIN_GAMES_FOR_BENCHMARK})')

print(f'\nTop 10 player averages:')
top_averages = sorted(benchmark.player_averages.items(), key=lambda x: x[1], reverse=True)[:10]
for player_id, avg_fpts in top_averages:
    player_name = df_qualified[df_qualified['playerID'] == player_id]['longName'].iloc[0] if player_id in df_qualified['playerID'].values else 'Unknown'
    print(f'  {player_name}: {avg_fpts:.2f} fpts')

2025-10-13 03:54:48,008 - src.evaluation.benchmarks.season_average - INFO - Fitted benchmark for 493 players (min_games=5)


Benchmark fitted for 493 players
  (min_games=5)

Top 10 player averages:
  Nikola Jokic: 66.59 fpts
  Giannis Antetokounmpo: 60.13 fpts
  Luka Doncic: 54.26 fpts
  Victor Wembanyama: 53.06 fpts
  Anthony Davis: 52.52 fpts
  Shai Gilgeous-Alexander: 52.34 fpts
  Domantas Sabonis: 50.48 fpts
  Karl-Anthony Towns: 49.87 fpts
  Cade Cunningham: 49.41 fpts
  LeBron James: 49.09 fpts


## Walk-Forward Validation

Train per-player models and generate predictions for each test slate.

In [16]:
import pickle
import os

slate_dates = loader.load_slate_dates(TEST_START, TEST_END)

print(f'Found {len(slate_dates)} slates to backtest')
print(f'Date range: {slate_dates[0]} to {slate_dates[-1]}')

results_list = []
all_predictions = []

mape_metric = MAPEMetric()
rmse_metric = RMSEMetric()
mae_metric = MAEMetric()
corr_metric = CorrelationMetric()

for test_date in tqdm(slate_dates, desc='Backtesting slates'):
    print(f'\n{"="*60}')
    print(f'Processing Slate: {test_date}')
    print(f'{"="*60}')

    slate_data = loader.load_slate_data(test_date)
    salaries_df = slate_data.get('dfs_salaries', pd.DataFrame())

    if salaries_df.empty:
        logger.warning(f'No salary data for {test_date}, skipping')
        continue

    print(f'  Found {len(salaries_df)} players with salaries')

    # Create directory for this date's models if saving
    if SAVE_MODELS:
        date_models_dir = MODELS_DIR / 'per_player' / test_date
        date_models_dir.mkdir(parents=True, exist_ok=True)
        slate_models_dir = MODELS_DIR / 'per_slate'
        slate_models_dir.mkdir(parents=True, exist_ok=True)
        print(f'  Saving models to: {date_models_dir}')

    slate_predictions = []
    models_trained = 0
    models_saved = 0
    players_skipped_insufficient_data = 0
    
    # Collect all slate training data for slate-wide model
    slate_X_train_list = []
    slate_y_train_list = []
    slate_playerid_list = []

    for _, player_row in salaries_df.iterrows():
        player_id = player_row.get('playerID')
        player_name = player_row.get('longName', player_row.get('playerName', ''))

        player_training_data = training_features[training_features['playerID'] == player_id].copy()

        if len(player_training_data) < MIN_PLAYER_GAMES:
            players_skipped_insufficient_data += 1
            continue

        try:
            metadata_cols = ['playerID', 'longName', 'playerName', 'team', 'pos', 'gameDate', 'fpts']
            feature_cols = [col for col in player_training_data.columns if col not in metadata_cols and col.startswith(('rolling_', 'ewma_'))]

            X_train = player_training_data[feature_cols].fillna(0)
            y_train = player_training_data['fpts']

            if len(X_train) < 3 or y_train.isna().all():
                continue

            # Add to slate-wide training data
            slate_X_train_list.append(X_train)
            slate_y_train_list.append(y_train)
            slate_playerid_list.extend([player_id] * len(X_train))

            # Train per-player model
            model = XGBoostModel()
            model.train(X_train, y_train)
            models_trained += 1

            # Save the per-player model if configured
            if SAVE_MODELS:
                try:
                    # Clean player name for filename (remove special characters)
                    clean_name = ''.join(c if c.isalnum() or c in (' ', '-', '_') else '_' for c in player_name)
                    clean_name = clean_name.replace(' ', '_').lower()
                    
                    # Create filename with player ID and name
                    model_filename = f"{player_id}_{clean_name}.pkl"
                    model_path = date_models_dir / model_filename
                    
                    # Save model
                    model.save(str(model_path))
                    
                    # Also save feature columns for this model
                    feature_cols_path = date_models_dir / f"{player_id}_{clean_name}_features.pkl"
                    with open(feature_cols_path, 'wb') as f:
                        pickle.dump(feature_cols, f)
                    
                    models_saved += 1
                    
                except Exception as save_error:
                    logger.warning(f'Error saving model for {player_name}: {str(save_error)}')

            latest_features = X_train.iloc[[-1]]
            prediction = model.predict(latest_features)[0]

            slate_predictions.append({
                'date': test_date,
                'playerID': player_id,
                'playerName': player_name,
                'team': player_row.get('team', ''),
                'pos': player_row.get('pos', ''),
                'salary': player_row.get('salary', 0),
                'projected_fpts': prediction,
                'benchmark_pred': benchmark.player_averages.get(player_id, 0)
            })

        except Exception as e:
            logger.warning(f'Error training model for {player_name} on {test_date}: {str(e)}')
            continue

    # Train and save slate-wide model if we have data
    if SAVE_MODELS and slate_X_train_list:
        try:
            # Combine all player data for slate-wide model
            slate_X_combined = pd.concat(slate_X_train_list, ignore_index=True)
            slate_y_combined = pd.concat(slate_y_train_list, ignore_index=True)
            
            print(f'  Training slate-wide model with {len(slate_X_combined)} samples from {len(slate_X_train_list)} players')
            
            # Train slate-wide model
            slate_model = XGBoostModel()
            slate_model.train(slate_X_combined, slate_y_combined)
            
            # Save slate-wide model
            slate_model_path = slate_models_dir / f"{test_date}_slate_model.pkl"
            slate_model.save(str(slate_model_path))
            
            # Save feature columns for slate model
            slate_features_path = slate_models_dir / f"{test_date}_slate_features.pkl"
            with open(slate_features_path, 'wb') as f:
                pickle.dump(feature_cols, f)
            
            # Save player ID mapping for slate model
            slate_playerids_path = slate_models_dir / f"{test_date}_slate_playerids.pkl"
            with open(slate_playerids_path, 'wb') as f:
                pickle.dump(slate_playerid_list, f)
            
            print(f'  Saved slate-wide model to: {slate_model_path}')
            
        except Exception as slate_error:
            logger.warning(f'Error saving slate-wide model for {test_date}: {str(slate_error)}')

    print(f'  Models trained: {models_trained}')
    if SAVE_MODELS:
        print(f'  Per-player models saved: {models_saved}')
    print(f'  Players skipped (insufficient data): {players_skipped_insufficient_data}')

    if not slate_predictions:
        print(f'  WARNING: No predictions generated for this slate')
        continue

    predictions_df = pd.DataFrame(slate_predictions)
    print(f'  Generated {len(predictions_df)} predictions')

    # Save predictions to parquet for this slate
    if SAVE_MODELS:
        predictions_path = slate_models_dir / f"{test_date}.parquet"
        predictions_df.to_parquet(predictions_path)
        print(f'  Saved slate predictions to: {predictions_path}')

    filters = {'start_date': test_date, 'end_date': test_date}
    actuals_df = loader.storage.load('box_scores', filters)

    if actuals_df.empty:
        logger.warning(f'No actuals for {test_date}')
        continue

    actuals_df['actual_fpts'] = actuals_df.apply(calculate_dk_fantasy_points, axis=1)

    merged = predictions_df.merge(
        actuals_df[['playerID', 'actual_fpts']],
        on='playerID',
        how='inner'
    )

    if len(merged) == 0:
        print(f'  WARNING: No matching actual results found')
        continue

    print(f'  Matched {len(merged)} players with actual results')

    model_mape = mape_metric.calculate(merged['actual_fpts'], merged['projected_fpts'])
    model_rmse = rmse_metric.calculate(merged['actual_fpts'], merged['projected_fpts'])
    model_mae = mae_metric.calculate(merged['actual_fpts'], merged['projected_fpts'])
    model_corr = corr_metric.calculate(merged['actual_fpts'], merged['projected_fpts'])

    has_benchmark = (merged['benchmark_pred'] > 0)
    benchmark_mape = mape_metric.calculate(merged[has_benchmark]['actual_fpts'], merged[has_benchmark]['benchmark_pred']) if has_benchmark.any() else np.nan
    benchmark_rmse = rmse_metric.calculate(merged[has_benchmark]['actual_fpts'], merged[has_benchmark]['benchmark_pred']) if has_benchmark.any() else np.nan

    print(f'\n  Performance Metrics:')
    print(f'  {"="*40}')
    print(f'  Model Performance:')
    print(f'    MAPE: {model_mape:.2f}%')
    print(f'    RMSE: {model_rmse:.2f}')
    print(f'    MAE:  {model_mae:.2f}')
    print(f'    Correlation: {model_corr:.3f}')

    if not np.isnan(benchmark_mape):
        print(f'\n  Benchmark Performance:')
        print(f'    MAPE: {benchmark_mape:.2f}%')
        print(f'    RMSE: {benchmark_rmse:.2f}')
        print(f'\n  Improvement over Benchmark:')
        print(f'    MAPE: {benchmark_mape - model_mape:+.2f}% {"(Better)" if benchmark_mape > model_mape else "(Worse)"}')

    print(f'\n  Fantasy Points Summary:')
    print(f'    Mean Actual: {merged["actual_fpts"].mean():.2f}')
    print(f'    Mean Projected: {merged["projected_fpts"].mean():.2f}')
    print(f'    Mean Error: {(merged["projected_fpts"] - merged["actual_fpts"]).mean():+.2f}')

    # Analyze by salary tier for this date
    merged['salary'] = merged['salary'].astype(int)
    merged['salary_bin'] = pd.cut(merged['salary'], bins=SALARY_TIERS, labels=['Low', 'Mid', 'High', 'Elite'][:len(SALARY_TIERS)-1])

    print(f'\n  Performance by Salary Tier:')
    for tier in merged['salary_bin'].unique():
        if pd.isna(tier):
            continue
        tier_data = merged[merged['salary_bin'] == tier]
        if len(tier_data) > 0:
            tier_mape = mape_metric.calculate(tier_data['actual_fpts'], tier_data['projected_fpts'])
            print(f'    {tier}: MAPE={tier_mape:.1f}% (n={len(tier_data)})')

    # Save merged results with actuals for analysis
    if SAVE_MODELS:
        results_with_actuals_path = slate_models_dir / f"{test_date}_with_actuals.parquet"
        merged.to_parquet(results_with_actuals_path)

    results_list.append({
        'date': test_date,
        'num_players': len(merged),
        'models_trained': models_trained,
        'models_saved': models_saved if SAVE_MODELS else 0,
        'model_mape': model_mape,
        'model_rmse': model_rmse,
        'model_mae': model_mae,
        'model_corr': model_corr,
        'benchmark_mape': benchmark_mape,
        'benchmark_rmse': benchmark_rmse,
        'mean_actual': merged['actual_fpts'].mean(),
        'mean_projected': merged['projected_fpts'].mean(),
        'mean_benchmark': merged['benchmark_pred'].mean()
    })

    all_predictions.append(merged)

results_df = pd.DataFrame(results_list)
all_predictions_df = pd.concat(all_predictions, ignore_index=True) if all_predictions else pd.DataFrame()

print(f'\n{"="*60}')
print(f'Completed backtest for {len(results_df)} slates')
print(f'Total predictions: {len(all_predictions_df)}')
if SAVE_MODELS:
    print(f'Total per-player models saved: {results_df["models_saved"].sum():.0f}')
    print(f'Models directory: {MODELS_DIR}')
print(f'{"="*60}')

2025-10-13 03:54:48,053 - src.data.loaders.historical_loader - INFO - Found 1 slate dates from 20250205 to 20250206


Found 1 slates to backtest
Date range: 20250205 to 20250205


Backtesting slates:   0%|          | 0/1 [00:00<?, ?it/s]2025-10-13 03:54:48,056 - src.data.loaders.historical_loader - INFO - Loading slate data for 20250205



Processing Slate: 20250205


2025-10-13 03:54:48,086 - src.data.loaders.historical_loader - INFO - Loaded slate data: 1139 salaries, 11 games


  Found 1139 players with salaries
  Saving models to: c:\Users\antho\OneDrive\Documents\Repositories\delapan-fantasy\data\models\per_player\20250205
  Training slate-wide model with 33862 samples from 985 players


Backtesting slates: 100%|██████████| 1/1 [03:28<00:00, 208.78s/it]

  Saved slate-wide model to: c:\Users\antho\OneDrive\Documents\Repositories\delapan-fantasy\data\models\per_slate\20250205_slate_model.pkl
  Models trained: 985
  Per-player models saved: 985
  Players skipped (insufficient data): 154
  Generated 985 predictions
  Saved slate predictions to: c:\Users\antho\OneDrive\Documents\Repositories\delapan-fantasy\data\models\per_slate\20250205.parquet
  Matched 705 players with actual results

  Performance Metrics:
  Model Performance:
    MAPE: 92.85%
    RMSE: 12.35
    MAE:  9.68
    Correlation: 0.663

  Benchmark Performance:
    MAPE: 87.28%
    RMSE: 10.30

  Improvement over Benchmark:
    MAPE: -5.57% (Worse)

  Fantasy Points Summary:
    Mean Actual: 21.40
    Mean Projected: 21.14
    Mean Error: -0.26

  Performance by Salary Tier:
    Low: MAPE=102.3% (n=432)
    Elite: MAPE=37.5% (n=52)
    High: MAPE=54.9% (n=60)
    Mid: MAPE=100.8% (n=161)

Completed backtest for 1 slates
Total predictions: 705
Total per-player models saved: 9




## Overall Results Summary

In [17]:
print('='*80)
print('BACKTEST RESULTS SUMMARY')
print('='*80)
print(f'\nNumber of Slates: {len(results_df)}')
print(f'Date Range: {results_df["date"].min()} to {results_df["date"].max()}')
print(f'\nTotal Players Evaluated: {results_df["num_players"].sum():.0f}')
print(f'Average Players per Slate: {results_df["num_players"].mean():.1f}')
print(f'Total Models Trained: {results_df["models_trained"].sum():.0f}')
if 'models_saved' in results_df.columns:
    print(f'Total Per-Player Models Saved: {results_df["models_saved"].sum():.0f}')
print(f'\nModel Performance:')
print(f'  Mean MAPE: {results_df["model_mape"].mean():.2f}%')
print(f'  Median MAPE: {results_df["model_mape"].median():.2f}%')
print(f'  Std MAPE: {results_df["model_mape"].std():.2f}%')
print(f'  Mean RMSE: {results_df["model_rmse"].mean():.2f}')
print(f'  Std RMSE: {results_df["model_rmse"].std():.2f}')
if 'model_mae' in results_df.columns:
    print(f'  Mean MAE: {results_df["model_mae"].mean():.2f}')
print(f'  Mean Correlation: {results_df["model_corr"].mean():.3f}')
print(f'  Std Correlation: {results_df["model_corr"].std():.3f}')
print(f'\nBenchmark Performance:')
print(f'  Mean MAPE: {results_df["benchmark_mape"].mean():.2f}%')
print(f'  Median MAPE: {results_df["benchmark_mape"].median():.2f}%')
print(f'\nImprovement (Model vs Benchmark):')
mape_improvement = results_df["benchmark_mape"].mean() - results_df["model_mape"].mean()
print(f'  MAPE Improvement: {mape_improvement:+.2f}% {"(Model better)" if mape_improvement > 0 else "(Benchmark better)"}')

if SAVE_MODELS:
    print(f'\nModel Storage:')
    print(f'  Per-player models: {MODELS_DIR}/per_player/[date]/[playerID]_[name].pkl')
    print(f'  Slate-wide models: {MODELS_DIR}/per_slate/[date]_slate_model.pkl')
    print(f'  Predictions: {MODELS_DIR}/per_slate/[date].parquet')
    print(f'  Results with actuals: {MODELS_DIR}/per_slate/[date]_with_actuals.parquet')

print('\n' + '='*80)
print('DAILY PERFORMANCE LOG')
print('='*80)

# Check which columns are available for the daily log
has_mae = 'model_mae' in results_df.columns
has_saved = 'models_saved' in results_df.columns

if has_mae and has_saved:
    print(f'{"Date":12} {"Players":>8} {"Saved":>6} {"MAPE":>8} {"RMSE":>8} {"MAE":>8} {"Corr":>8} {"Bench":>8} {"Imp":>8}')
elif has_mae:
    print(f'{"Date":12} {"Players":>8} {"MAPE":>8} {"RMSE":>8} {"MAE":>8} {"Corr":>8} {"Bench MAPE":>11} {"Improvement":>12}')
elif has_saved:
    print(f'{"Date":12} {"Players":>8} {"Saved":>6} {"MAPE":>8} {"RMSE":>8} {"Corr":>8} {"Bench":>8} {"Imp":>8}')
else:
    print(f'{"Date":12} {"Players":>8} {"MAPE":>8} {"RMSE":>8} {"Corr":>8} {"Bench MAPE":>11} {"Improvement":>12}')
    
print('-'*80)
for _, row in results_df.iterrows():
    improvement = row["benchmark_mape"] - row["model_mape"] if not pd.isna(row["benchmark_mape"]) else 0
    bench_str = f'{row["benchmark_mape"]:.1f}%' if not pd.isna(row["benchmark_mape"]) else 'N/A'
    imp_str = f'{improvement:+.1f}%' if not pd.isna(row["benchmark_mape"]) else 'N/A'
    
    if has_mae and has_saved:
        print(f'{row["date"]:12} {row["num_players"]:>8} {row.get("models_saved", 0):>6} {row["model_mape"]:>7.1f}% {row["model_rmse"]:>8.2f} {row.get("model_mae", 0):>8.2f} {row["model_corr"]:>8.3f} {bench_str:>8} {imp_str:>8}')
    elif has_mae:
        print(f'{row["date"]:12} {row["num_players"]:>8} {row["model_mape"]:>7.1f}% {row["model_rmse"]:>8.2f} {row.get("model_mae", 0):>8.2f} {row["model_corr"]:>8.3f} {bench_str:>11} {imp_str:>12}')
    elif has_saved:
        print(f'{row["date"]:12} {row["num_players"]:>8} {row.get("models_saved", 0):>6} {row["model_mape"]:>7.1f}% {row["model_rmse"]:>8.2f} {row["model_corr"]:>8.3f} {bench_str:>8} {imp_str:>8}')
    else:
        print(f'{row["date"]:12} {row["num_players"]:>8} {row["model_mape"]:>7.1f}% {row["model_rmse"]:>8.2f} {row["model_corr"]:>8.3f} {bench_str:>11} {imp_str:>12}')
        
print('='*80)

BACKTEST RESULTS SUMMARY

Number of Slates: 1
Date Range: 20250205 to 20250205

Total Players Evaluated: 705
Average Players per Slate: 705.0
Total Models Trained: 985
Total Per-Player Models Saved: 985

Model Performance:
  Mean MAPE: 92.85%
  Median MAPE: 92.85%
  Std MAPE: nan%
  Mean RMSE: 12.35
  Std RMSE: nan
  Mean MAE: 9.68
  Mean Correlation: 0.663
  Std Correlation: nan

Benchmark Performance:
  Mean MAPE: 87.28%
  Median MAPE: 87.28%

Improvement (Model vs Benchmark):
  MAPE Improvement: -5.57% (Benchmark better)

Model Storage:
  Per-player models: c:\Users\antho\OneDrive\Documents\Repositories\delapan-fantasy\data\models/per_player/[date]/[playerID]_[name].pkl
  Slate-wide models: c:\Users\antho\OneDrive\Documents\Repositories\delapan-fantasy\data\models/per_slate/[date]_slate_model.pkl
  Predictions: c:\Users\antho\OneDrive\Documents\Repositories\delapan-fantasy\data\models/per_slate/[date].parquet
  Results with actuals: c:\Users\antho\OneDrive\Documents\Repositories\del

## Benchmark Comparison Analysis

In [18]:
comparison_df = all_predictions_df[(all_predictions_df['projected_fpts'] > 0) & (all_predictions_df['benchmark_pred'] > 0)].copy()

comparison_results = benchmark.compare_with_model(
    actual=comparison_df['actual_fpts'],
    model_pred=comparison_df['projected_fpts'],
    benchmark_pred=comparison_df['benchmark_pred']
)

print(comparison_results['summary'])


Benchmark Comparison

Model Performance:
  MAPE: 92.81%
  RMSE: 12.33
  MAE:  9.65

Benchmark Performance:
  MAPE: 87.46%
  RMSE: 10.31
  MAE:  8.02

Improvement (positive = model better):
  MAPE: -5.36% 
  RMSE: -2.02
  MAE:  -1.63



In [19]:
tier_data = comparison_df.copy()
tier_data = tier_data.rename(columns={
    'projected_fpts': 'model_pred',
    'actual_fpts': 'actual'
})

# Ensure salary column is numeric
tier_data['salary'] = pd.to_numeric(tier_data['salary'], errors='coerce')
tier_data = tier_data[tier_data['salary'].notna()]

tier_comparison = benchmark.compare_by_salary_tier(tier_data, SALARY_TIERS)

print('Performance by Salary Tier:')
print('='*80)
print(tier_comparison[['salary_tier', 'count', 'model_mape', 'benchmark_mape', 'mape_improvement']].to_string(index=False))
print('\nDetailed Breakdown:')
for _, row in tier_comparison.iterrows():
    improvement = row['mape_improvement']
    status = 'BETTER' if improvement > 0 else 'WORSE'
    symbol = '+' if improvement > 0 else ''
    
    print(f'{str(row["salary_tier"]):20} {symbol}{improvement:6.1f}% {status:8} '
          f'(Model: {row["model_mape"]:.1f}%, Benchmark: {row["benchmark_mape"]:.1f}%)')

Performance by Salary Tier:
salary_tier  count  model_mape  benchmark_mape  mape_improvement
        Low    429  102.320978       95.719187         -6.601791
      Elite     52   37.486085       27.334682        -10.151403
       High     60   54.861061       44.166952        -10.694110
        Mid    161  100.778741      102.156383          1.377642

Detailed Breakdown:
Low                    -6.6% WORSE    (Model: 102.3%, Benchmark: 95.7%)
Elite                 -10.2% WORSE    (Model: 37.5%, Benchmark: 27.3%)
High                  -10.7% WORSE    (Model: 54.9%, Benchmark: 44.2%)
Mid                  +   1.4% BETTER   (Model: 100.8%, Benchmark: 102.2%)


In [20]:
comparison_df['model_error'] = np.abs(comparison_df['projected_fpts'] - comparison_df['actual_fpts'])
comparison_df['benchmark_error'] = np.abs(comparison_df['benchmark_pred'] - comparison_df['actual_fpts'])

model_errors = comparison_df['model_error'].values
benchmark_errors = comparison_df['benchmark_error'].values

t_stat, p_value = stats.ttest_rel(model_errors, benchmark_errors)

print('Statistical Significance Test (Paired t-test):')
print(f'  t-statistic: {t_stat:.4f}')
print(f'  p-value: {p_value:.6f}')
print()
if p_value < 0.05:
    if t_stat < 0:
        print('  Result: Model is SIGNIFICANTLY BETTER than benchmark (p < 0.05)')
    else:
        print('  Result: Model is SIGNIFICANTLY WORSE than benchmark (p < 0.05)')
else:
    print('  Result: No significant difference between model and benchmark (p >= 0.05)')

cohens_d = (model_errors.mean() - benchmark_errors.mean()) / np.sqrt((model_errors.std()**2 + benchmark_errors.std()**2) / 2)
print(f'\n  Cohen\'s d: {cohens_d:.4f}')

if abs(cohens_d) < 0.2:
    effect_size = 'negligible'
elif abs(cohens_d) < 0.5:
    effect_size = 'small'
elif abs(cohens_d) < 0.8:
    effect_size = 'medium'
else:
    effect_size = 'large'
print(f'  Effect size: {effect_size}')

print(f'\nError Statistics:')
print(f'  Model MAE: {model_errors.mean():.2f}')
print(f'  Benchmark MAE: {benchmark_errors.mean():.2f}')
print(f'  Difference: {benchmark_errors.mean() - model_errors.mean():.2f} (positive = model better)')

Statistical Significance Test (Paired t-test):
  t-statistic: 6.2303
  p-value: 0.000000

  Result: Model is SIGNIFICANTLY WORSE than benchmark (p < 0.05)

  Cohen's d: 0.2295
  Effect size: small

Error Statistics:
  Model MAE: 9.65
  Benchmark MAE: 8.02
  Difference: -1.63 (positive = model better)


## Visualizations

In [27]:
# Use plotly dark theme and vibrant custom colors for all traces and lines.

import plotly.io as pio
pio.templates.default = "plotly_dark"

vibrant_colors = {
    "model": "#9AFF6E",        # Vibrant light green
    "benchmark": "#3ABEFF",    # Vibrant blue/cyan
    "model_mean": "#FFFF35",   # Lime Yellow
    "benchmark_mean": "#FD5A66", # Vibrant Red
    "rmse": "#00FFC2",         # Aqua
    "rmse_mean": "#EA00FF",    # Vibrant Magenta
    "corr": "#FBBF24",         # Vibrant Gold
    "corr_mean": "#FF7A00",    # Orange
    "players_bar": "#FF53A1",  # Hot Pink
    "players_mean": "#25FFF1", # Electric Cyan
}

fig = make_subplots(
    rows=2, cols=2,
    subplot_titles=(
        '<b>MAPE Over Time</b>', 
        '<b>RMSE Over Time</b>', 
        '<b>Correlation Over Time</b>', 
        '<b>Players Evaluated Per Slate</b>'),
    vertical_spacing=0.15,
    horizontal_spacing=0.1
)

# MAPE Over Time
fig.add_trace(
    go.Scatter(
        x=results_df['date'], 
        y=results_df['model_mape'], 
        mode='lines+markers', 
        name='Model', 
        line=dict(width=2, color=vibrant_colors["model"]), 
        marker=dict(size=10, color=vibrant_colors["model"], symbol='circle', line=dict(width=1, color='black'))
    ),
    row=1, col=1
)
fig.add_trace(
    go.Scatter(
        x=results_df['date'], 
        y=results_df['benchmark_mape'], 
        mode='lines+markers', 
        name='Benchmark', 
        line=dict(width=2, color=vibrant_colors["benchmark"]),
        marker=dict(size=10, color=vibrant_colors["benchmark"], symbol='square', line=dict(width=1, color='black')),
        opacity=0.85
    ),
    row=1, col=1
)
fig.add_hline(
    y=results_df['model_mape'].mean(),
    line_dash="dash",
    line_color=vibrant_colors["model_mean"],
    opacity=0.7,
    row=1, col=1,
    annotation_text="Model Mean",
    annotation_position="right"
)
fig.add_hline(
    y=results_df['benchmark_mape'].mean(),
    line_dash="dash",
    line_color=vibrant_colors["benchmark_mean"],
    opacity=0.7,
    row=1, col=1,
    annotation_text="Benchmark Mean",
    annotation_position="right"
)

# RMSE Over Time  
fig.add_trace(
    go.Scatter(
        x=results_df['date'], 
        y=results_df['model_rmse'], 
        mode='lines+markers', 
        name='RMSE',
        line=dict(color=vibrant_colors["rmse"], width=2), 
        marker=dict(size=10, color=vibrant_colors["rmse"], symbol='diamond', line=dict(width=1, color='black'))
    ),
    row=1, col=2
)
fig.add_hline(
    y=results_df['model_rmse'].mean(), 
    line_dash="dash", 
    line_color=vibrant_colors["rmse_mean"], 
    row=1, col=2,
    annotation_text=f"Mean: {results_df['model_rmse'].mean():.2f}",
    annotation_position="right"
)

# Correlation Over Time
fig.add_trace(
    go.Scatter(
        x=results_df['date'], 
        y=results_df['model_corr'], 
        mode='lines+markers', 
        name='Correlation',
        line=dict(color=vibrant_colors["corr"], width=2), 
        marker=dict(size=10, color=vibrant_colors["corr"], symbol='cross', line=dict(width=1, color='black'))
    ),
    row=2, col=1
)
fig.add_hline(
    y=results_df['model_corr'].mean(), 
    line_dash="dash", 
    line_color=vibrant_colors["corr_mean"], 
    row=2, col=1,
    annotation_text=f"Mean: {results_df['model_corr'].mean():.3f}",
    annotation_position="right"
)

# Players Evaluated Per Slate
fig.add_trace(
    go.Bar(
        x=results_df['date'], 
        y=results_df['num_players'], 
        name='Players', 
        marker=dict(color=vibrant_colors["players_bar"], opacity=0.85, line=dict(color="white", width=0.5))
    ),
    row=2, col=2
)
fig.add_hline(
    y=results_df['num_players'].mean(), 
    line_dash="dash", 
    line_color=vibrant_colors["players_mean"], 
    row=2, col=2,
    annotation_text=f"Mean: {results_df['num_players'].mean():.1f}",
    annotation_position="right"
)

# Update layout for dark theme and axis/legend colors
axis_style = dict(color="white", showline=True, linewidth=1.5, linecolor='#666', zerolinecolor="#444")
fig.update_xaxes(title_text="<b>Date</b>", row=1, col=1, tickangle=45, **axis_style)
fig.update_xaxes(title_text="<b>Date</b>", row=1, col=2, tickangle=45, **axis_style)
fig.update_xaxes(title_text="<b>Date</b>", row=2, col=1, tickangle=45, **axis_style)
fig.update_xaxes(title_text="<b>Date</b>", row=2, col=2, tickangle=45, **axis_style)

fig.update_yaxes(title_text="<b>MAPE (%)</b>", row=1, col=1, **axis_style)
fig.update_yaxes(title_text="<b>RMSE</b>", row=1, col=2, **axis_style)
fig.update_yaxes(title_text="<b>Correlation</b>", row=2, col=1, **axis_style)
fig.update_yaxes(title_text="<b>Number of Players</b>", row=2, col=2, **axis_style)

fig.update_layout(
    height=800,
    showlegend=True,
    legend=dict(
        orientation="h",
        yanchor="bottom",
        y=1.05,
        xanchor="center",
        x=0.5,
        font=dict(color="white", size=14)
    ),
    plot_bgcolor="#22272e",
    paper_bgcolor="#22272e",
    font=dict(family="Segoe UI, Roboto, Arial", size=14, color="white"),
    title_text="<b>Backtest Performance Metrics</b>",
    title_x=0.5,
    margin=dict(l=40, r=40, t=40, b=40)  # Reduced top margin to prevent title overlap
)

# Move the title lower by adding extra top padding to the subplot area, 
# so title does not overlap with subplot titles or legend.
fig.update_layout(
    margin=dict(t=110)  # Increase only top margin for extra spacing for title
)

fig.show()

In [30]:
# --- Dark theme and vibrant colors ---
vibrant_colors = {
    "scatter": "#00D7FF",        # Cyan
    "diagonal": "#FF0080",       # Pink/magenta
    "histogram": "#18FF6D",      # Vibrant green
    "histogram_border": "#22272e",
    "vline_zero": "#FF0080",     # Pink/magenta
    "vline_mean": "#FFD700",     # Gold/yellow
}

fig = make_subplots(
    rows=1, cols=2,
    subplot_titles=(
        '<span style="color:white"><b>Model vs Benchmark Error Comparison</b></span>', 
        '<span style="color:white"><b>Error Difference Distribution</b><br>(Positive = Model Better)</span>'
    ),
    horizontal_spacing=0.15
)

# Scatter plot: Model vs Benchmark Error
fig.add_trace(
    go.Scatter(
        x=comparison_df['benchmark_error'], y=comparison_df['model_error'],
        mode='markers',
        marker=dict(size=7, opacity=0.7, color=vibrant_colors["scatter"], line=dict(width=0)),
        name='Errors',
        showlegend=True
    ),
    row=1, col=1
)

# Add diagonal line for equal error
max_error = max(comparison_df['benchmark_error'].max(), comparison_df['model_error'].max()) * 1.03
fig.add_trace(
    go.Scatter(
        x=[0, max_error], y=[0, max_error],
        mode='lines',
        line=dict(color=vibrant_colors["diagonal"], dash='dash', width=2),
        name='Equal error',
        showlegend=True
    ),
    row=1, col=1
)

# Histogram of error differences
error_diff = comparison_df['benchmark_error'] - comparison_df['model_error']
fig.add_trace(
    go.Histogram(
        x=error_diff, nbinsx=30,
        marker=dict(color=vibrant_colors["histogram"], opacity=0.85, 
                    line=dict(color=vibrant_colors["histogram_border"], width=1.2)),
        name='Error Difference',
        showlegend=True
    ),
    row=1, col=2
)

# Add vertical lines for reference
fig.add_vline(
    x=0,
    line_dash="dash",
    line_color=vibrant_colors["vline_zero"],
    row=1, col=2,
    annotation_text="<b style='color:#FF0080'>No difference</b>",
    annotation_position="top"
)
fig.add_vline(
    x=error_diff.mean(),
    line_dash="dash",
    line_color=vibrant_colors["vline_mean"],
    row=1, col=2,
    annotation_text=f"<b style='color:#FFD700'>Mean: {error_diff.mean():.2f}</b>",
    annotation_position="top right"
)

# Update layout for dark theme and axis/legend colors
axis_style = dict(color="white", showline=True, linewidth=1.7, linecolor='#666', zerolinecolor="#444")
fig.update_xaxes(title_text="<b style='color:#75eaff'>Benchmark Error</b>", row=1, col=1, tickfont_color="white", **axis_style)
fig.update_yaxes(title_text="<b style='color:#FF0080'>Model Error</b>", row=1, col=1, tickfont_color="white", **axis_style)
fig.update_xaxes(title_text="<b style='color:#18FF6D'>Error Difference (Benchmark - Model)</b>", row=1, col=2, tickfont_color="white", **axis_style)
fig.update_yaxes(title_text="<b style='color:#FFD700'>Frequency</b>", row=1, col=2, tickfont_color="white", **axis_style)

fig.update_layout(
    height=800,  # Increased from 520 to 800 for bigger plot
    width=1400,  # Added width for more horizontal room
    showlegend=True,
    legend=dict(
        orientation="h",
        y=1.05,
        yanchor="bottom",
        xanchor="center",
        x=0.5,
        font=dict(color="white", size=13)
    ),
    plot_bgcolor="#22272e",
    paper_bgcolor="#22272e",
    font=dict(family="Segoe UI, Roboto, Arial", size=15, color="white"),
    title_text="<b>Model vs Benchmark Error Analysis</b>",
    title_x=0.5,
    margin=dict(l=60, r=60, t=80, b=55)  # Added more margin for clarity
)

fig.show()

In [31]:
fig = make_subplots(
    rows=1, cols=2,
    subplot_titles=('MAPE by Salary Tier', 'Model Improvement Over Benchmark<br>(Positive = Model Better)'),
    horizontal_spacing=0.15
)

# MAPE by Salary Tier - Grouped Bar Chart
x_labels = tier_comparison['salary_tier'].astype(str)
x_pos = np.arange(len(x_labels))

fig.add_trace(
    go.Bar(x=x_labels, y=tier_comparison['model_mape'],
           name='Model', marker=dict(opacity=0.8)),
    row=1, col=1
)
fig.add_trace(
    go.Bar(x=x_labels, y=tier_comparison['benchmark_mape'],
           name='Benchmark', marker=dict(opacity=0.8)),
    row=1, col=1
)

# Improvement Bar Chart
colors = ['green' if x > 0 else 'red' for x in tier_comparison['mape_improvement']]
fig.add_trace(
    go.Bar(x=x_labels, y=tier_comparison['mape_improvement'],
           marker=dict(color=colors, opacity=0.7),
           showlegend=False),
    row=1, col=2
)

# Add horizontal line at 0 for improvement chart
fig.add_hline(y=0, line_color="black", line_width=0.8, row=1, col=2)

# Update layout
fig.update_xaxes(title_text="Salary Tier", row=1, col=1)
fig.update_xaxes(title_text="Salary Tier", row=1, col=2)
fig.update_yaxes(title_text="MAPE (%)", row=1, col=1)
fig.update_yaxes(title_text="MAPE Improvement (%)", row=1, col=2)

fig.update_layout(
    height=500,
    barmode='group',
    title_text="Performance Analysis by Salary Tier",
    title_x=0.5
)

fig.show()

In [32]:
if MODE == 'backtest' and 'error' not in results:
    print('='*80)
    print('BACKTEST RESULTS SUMMARY')
    print('='*80)
    print(f"\nNumber of Slates: {results['num_slates']}")
    print(f"Date Range: {results['date_range']}")
    print(f"\nTotal Players Evaluated: {results['total_players_evaluated']:.0f}")
    print(f"Average Players per Slate: {results['avg_players_per_slate']:.1f}")
    print(f"\nMean MAPE: {results['mean_mape']:.2f}%")
    print(f"Median MAPE: {results['median_mape']:.2f}%")
    print(f"Std MAPE: {results['std_mape']:.2f}%")
    print(f"\nMean RMSE: {results['mean_rmse']:.2f}")
    print(f"Std RMSE: {results['std_rmse']:.2f}")
    print(f"\nMean Correlation: {results['mean_correlation']:.3f}")
    print(f"Std Correlation: {results['std_correlation']:.3f}")
    print('='*80)
elif MODE == 'backtest':
    print(f"ERROR: {results.get('error', 'Unknown error')}")

NameError: name 'MODE' is not defined

In [None]:
if MODE == 'backtest' and 'error' not in results and 'daily_results' in results:
    daily_df = results['daily_results']
    
    fig = make_subplots(
        rows=2, cols=2,
        subplot_titles=('MAPE Over Time', 'RMSE Over Time', 'Correlation Over Time', 'Players Evaluated Per Slate'),
        vertical_spacing=0.15,
        horizontal_spacing=0.1
    )
    
    # MAPE Over Time
    fig.add_trace(
        go.Scatter(x=daily_df['date'], y=daily_df['mape'], 
                   mode='lines+markers', name='MAPE',
                   line=dict(width=2), marker=dict(size=8)),
        row=1, col=1
    )
    fig.add_hline(y=daily_df['mape'].mean(), line_dash="dash", 
                  line_color="red", row=1, col=1,
                  annotation_text=f"Mean: {daily_df['mape'].mean():.1f}%",
                  annotation_position="right")
    
    # RMSE Over Time
    fig.add_trace(
        go.Scatter(x=daily_df['date'], y=daily_df['rmse'], 
                   mode='lines+markers', name='RMSE',
                   line=dict(color='orange', width=2), marker=dict(size=8)),
        row=1, col=2
    )
    fig.add_hline(y=daily_df['rmse'].mean(), line_dash="dash", 
                  line_color="red", row=1, col=2,
                  annotation_text=f"Mean: {daily_df['rmse'].mean():.2f}",
                  annotation_position="right")
    
    # Correlation Over Time
    fig.add_trace(
        go.Scatter(x=daily_df['date'], y=daily_df['correlation'], 
                   mode='lines+markers', name='Correlation',
                   line=dict(color='green', width=2), marker=dict(size=8)),
        row=2, col=1
    )
    fig.add_hline(y=daily_df['correlation'].mean(), line_dash="dash", 
                  line_color="red", row=2, col=1,
                  annotation_text=f"Mean: {daily_df['correlation'].mean():.3f}",
                  annotation_position="right")
    
    # Players Evaluated Per Slate
    fig.add_trace(
        go.Bar(x=daily_df['date'], y=daily_df['num_players'], 
               name='Players', marker=dict(color='purple', opacity=0.7)),
        row=2, col=2
    )
    fig.add_hline(y=daily_df['num_players'].mean(), line_dash="dash", 
                  line_color="red", row=2, col=2,
                  annotation_text=f"Mean: {daily_df['num_players'].mean():.1f}",
                  annotation_position="right")
    
    # Update layout
    fig.update_xaxes(title_text="Date", row=1, col=1, tickangle=45)
    fig.update_xaxes(title_text="Date", row=1, col=2, tickangle=45)
    fig.update_xaxes(title_text="Date", row=2, col=1, tickangle=45)
    fig.update_xaxes(title_text="Date", row=2, col=2, tickangle=45)
    
    fig.update_yaxes(title_text="MAPE (%)", row=1, col=1)
    fig.update_yaxes(title_text="RMSE", row=1, col=2)
    fig.update_yaxes(title_text="Correlation", row=2, col=1)
    fig.update_yaxes(title_text="Number of Players", row=2, col=2)
    
    fig.update_layout(
        height=800,
        showlegend=True,
        title_text="Backtest Daily Performance Metrics",
        title_x=0.5
    )
    
    fig.show()

In [None]:
if MODE == 'inference':
    print(f'Running inference for {TEST_START}')
    
    storage = SQLiteStorage(str(DB_PATH))
    loader = HistoricalDataLoader(storage)
    feature_builder = FeatureBuilder()
    client = Tank01Client(api_key)
    
    print('\nFetching today\'s slate data...')
    
    salaries_resp = client.get_dfs_salaries(date=TEST_START, lineup_type='DraftKings')
    schedule_resp = client.get_schedule(date=TEST_START)
    
    if salaries_resp['statusCode'] != 200:
        print(f"ERROR fetching salaries: {salaries_resp}")
    else:
        salaries_df = pd.DataFrame(salaries_resp['body'])
        print(f"Found {len(salaries_df)} players on slate")
        
        if schedule_resp['statusCode'] == 200:
            schedule_df = pd.DataFrame(schedule_resp['body'])
            print(f"Found {len(schedule_df)} games")
        else:
            schedule_df = pd.DataFrame()
            print('No schedule data available')
        
        slate_data = {
            'dfs_salaries': salaries_df,
            'schedule': schedule_df,
            'date': TEST_START
        }
        
        print(f'\nLoading historical data from current and previous seasons...')
        training_data = loader.load_historical_player_logs(
            end_date=TEST_START
        )
        print(f"Loaded {len(training_data)} historical games")
        
        projections_list = []
        
        print(f'\nGenerating projections for {len(salaries_df)} players...')
        
        for idx, player_row in salaries_df.iterrows():
            player_id = player_row.get('playerID')
            player_name = player_row.get('longName') or player_row.get('playerName', '')
            
            player_training_data = training_data[training_data['playerID'] == player_id].copy()
            
            if len(player_training_data) < MIN_PLAYER_GAMES:
                continue
            
            try:
                X_train, y_train = feature_builder.build_training_features(
                    player_training_data,
                    window_sizes=ROLLING_WINDOW_SIZES
                )
                
                if X_train.empty or y_train.empty or len(X_train) < 3:
                    continue
                
                if MODEL_TYPE == 'xgboost':
                    import xgboost as xgb
                    model = xgb.XGBRegressor(**MODEL_PARAMS)
                    model.fit(X_train, y_train, verbose=False)
                else:
                    from sklearn.ensemble import RandomForestRegressor
                    model = RandomForestRegressor(
                        n_estimators=MODEL_PARAMS.get('n_estimators', 200),
                        max_depth=MODEL_PARAMS.get('max_depth', 6),
                        random_state=MODEL_PARAMS.get('random_state', 42),
                        n_jobs=-1
                    )
                    model.fit(X_train, y_train)
                
                slate_data_single = {
                    'dfs_salaries': salaries_df.iloc[[idx]],
                    'date': TEST_START
                }
                
                slate_features = feature_builder.build_slate_features(
                    slate_data_single,
                    player_training_data,
                    window_sizes=ROLLING_WINDOW_SIZES
                )
                
                if slate_features.empty:
                    continue
                
                metadata_cols = ['playerID', 'playerName', 'team', 'pos', 'salary']
                feature_cols = [col for col in slate_features.columns if col not in metadata_cols]
                
                X = slate_features[feature_cols].fillna(0)
                prediction = model.predict(X)[0]
                
                projection = {
                    'playerID': player_id,
                    'playerName': player_name,
                    'team': player_row.get('team', ''),
                    'pos': player_row.get('pos', ''),
                    'salary': player_row.get('salary', 0),
                    'projected_fpts': prediction,
                    'value': prediction / (player_row.get('salary', 1) / 1000)
                }
                
                projections_list.append(projection)
                
            except Exception as e:
                logger.warning(f"Error generating projection for {player_name}: {str(e)}")
                continue
        
        projections_df = pd.DataFrame(projections_list)
        projections_df = projections_df.sort_values('projected_fpts', ascending=False)
        
        print(f'\nGenerated {len(projections_df)} projections')
        
        OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
        output_file = OUTPUT_DIR / f'projections_{TEST_START}.csv'
        projections_df.to_csv(output_file, index=False)
        print(f'\nProjections saved to: {output_file}')
        
        print('\nTop 20 Projections:')
        display(projections_df.head(20))
        
        print('\nTop 20 By Value:')
        display(projections_df.sort_values('value', ascending=False).head(20))

In [None]:
if MODE == 'backtest' and 'error' not in results and 'daily_results' in results:
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    
    daily_df = results['daily_results']
    csv_path = OUTPUT_DIR / f'backtest_results_{TEST_START}_to_{TEST_END}.csv'
    daily_df.to_csv(csv_path, index=False)
    print(f'Results exported to: {csv_path}')
    
    summary_path = OUTPUT_DIR / f'summary_{TEST_START}_to_{TEST_END}.txt'
    with open(summary_path, 'w') as f:
        f.write('='*80 + '\n')
        f.write('BACKTEST RESULTS SUMMARY\n')
        f.write('='*80 + '\n\n')
        f.write(f"Date Range: {results['date_range']}\n")
        f.write(f"Number of Slates: {results['num_slates']}\n")
        f.write(f"Total Players Evaluated: {results['total_players_evaluated']:.0f}\n")
        f.write(f"Average Players per Slate: {results['avg_players_per_slate']:.1f}\n\n")
        f.write(f"Mean MAPE: {results['mean_mape']:.2f}%\n")
        f.write(f"Median MAPE: {results['median_mape']:.2f}%\n")
        f.write(f"Std MAPE: {results['std_mape']:.2f}%\n\n")
        f.write(f"Mean RMSE: {results['mean_rmse']:.2f}\n")
        f.write(f"Std RMSE: {results['std_rmse']:.2f}\n\n")
        f.write(f"Mean Correlation: {results['mean_correlation']:.3f}\n")
        f.write(f"Std Correlation: {results['std_correlation']:.3f}\n")
    
    print(f'Summary exported to: {summary_path}')

Results exported to: c:\Users\antho\OneDrive\Documents\Repositories\delapan-fantasy\data\outputs\backtest_results_20250201_to_20250401.csv
Summary exported to: c:\Users\antho\OneDrive\Documents\Repositories\delapan-fantasy\data\outputs\summary_20250201_to_20250401.txt
