# Week 10 Diagnostics

Evaluate saved predictions vs actual results, inspect the largest misses, and pull feature diagnostics.

In [1]:
import sys
from pathlib import Path
PROJ_ROOT = Path('..').resolve()
if str(PROJ_ROOT) not in sys.path:
    sys.path.append(str(PROJ_ROOT))


In [2]:
import pandas as pd
import plotly.express as px
from evaluate_predictions import load_predictions, load_actual_results, evaluate
from src.data import nfl_fetcher
from src.models.predictor import GamePredictor


## 1. Load Predictions and Actual Results

In [3]:
PREDICTIONS_PATH = '../data/predictions/week10.csv'
SEASON = 2025
WEEK = 10

preds = load_predictions(PREDICTIONS_PATH)
actuals = load_actual_results(SEASON, WEEK)
merged, metrics = evaluate(preds, actuals)
metrics

{'games_evaluated': 13,
 'direction_accuracy': np.float64(0.6923076923076923),
 'mean_abs_spread_error': np.float64(10.655974987793396),
 'spread_rmse': np.float64(12.070922785505434),
 'brier_score': np.float64(0.2140158214432254)}

## 2. Biggest Misses

In [4]:
top_misses = merged.sort_values('abs_spread_error', ascending=False).head(5)
top_misses[['home_team', 'away_team', 'game_date', 'predicted_spread', 'actual_margin', 'spread_error']]

Unnamed: 0,home_team,away_team,game_date,predicted_spread,actual_margin,spread_error
11,WAS,DET,2025-11-09,-0.158054,-22.0,21.841946
5,MIA,BUF,2025-11-09,0.448009,17.0,-16.551991
10,SF,LA,2025-11-09,-0.342317,-16.0,15.657683
2,CAR,NO,2025-11-09,4.716448,-10.0,14.716448
9,SEA,ARI,2025-11-09,8.048971,22.0,-13.951029


## 3. Visualize Predicted vs Actual Margin

In [5]:
fig = px.scatter(merged, x='predicted_spread', y='actual_margin', hover_data=['home_team', 'away_team', 'game_date'])
fig.add_hline(y=0, line_dash='dash', line_color='gray')
fig.add_vline(x=0, line_dash='dash', line_color='gray')
line_df = pd.DataFrame({'predicted_spread': [merged['predicted_spread'].min(), merged['predicted_spread'].max()]})
line_df['actual_margin'] = line_df['predicted_spread']
fig.add_trace(px.line(line_df, x='predicted_spread', y='actual_margin').data[0])
fig.update_layout(title='Predicted vs Actual Margin')
fig

## 4. Pull Feature Diagnostics For Top Misses

In [6]:
predictor = GamePredictor('NFL', 'v2')
schedule = nfl_fetcher.fetch_nfl_schedule(SEASON)
schedule['game_date'] = pd.to_datetime(schedule['gameday']) if 'gameday' in schedule.columns else pd.to_datetime(schedule['game_date'])

feature_dfs = []
for _, row in top_misses.iterrows():
    game_row = pd.DataFrame([{
        'home_team': row['home_team'],
        'away_team': row['away_team'],
        'game_date': row['game_date'],
        'season': SEASON
    }])
    features = predictor.build_features_for_game(game_row, schedule)
    matchup_label = f"{row['away_team']}@{row['home_team']}"
    features['matchup'] = matchup_label
    feature_dfs.append(features)

diagnostic_features = pd.concat(feature_dfs, ignore_index=True)
diagnostic_features.head()


Unnamed: 0,home_team,away_team,game_date,season,rest_home,rest_away,b2b_home,b2b_away,opp_strength_home_season,opp_strength_away_season,...,rest_advantage_home,win_pct_differential,point_diff_differential,point_diff_gap,point_diff_gap_flag,opp_strength_differential,week_number,month,is_playoff,matchup
0,WAS,DET,2025-11-09,2025,7,7,False,False,0.42284,-0.564236,...,0,-0.291667,-11.513889,11.513889,1,0.987076,45,11,0,DET@WAS
1,MIA,BUF,2025-11-09,2025,10,7,False,False,0.746914,-2.413194,...,1,-0.527778,-15.5,15.5,1,3.160108,45,11,0,BUF@MIA
2,SF,LA,2025-11-09,2025,7,7,False,False,0.770062,-0.875,...,0,-0.083333,-9.583333,9.583333,1,1.645062,45,11,0,LA@SF
3,CAR,NO,2025-11-09,2025,7,7,False,False,0.180556,3.70216,...,0,0.444444,7.777778,-7.777778,0,-3.521605,45,11,0,NO@CAR
4,SEA,ARI,2025-11-09,2025,7,6,False,False,-0.704861,-0.414931,...,1,0.375,9.0,-9.0,0,-0.289931,45,11,0,ARI@SEA


Use `diagnostic_features` to inspect rest, strength, and form values for the largest misses. Feed these rows into `diagnose_key_games.py` or additional plots to pinpoint mis-specified features.

## 5. Join Diagnostics With Residuals

In [7]:
diagnostic_summary = diagnostic_features.merge(
    top_misses[['home_team','away_team','game_date','predicted_spread','home_win_probability',
                'actual_margin','spread_error','abs_spread_error','brier_component']],
    on=['home_team','away_team','game_date'],
    how='left'
)
diagnostic_summary[['matchup','predicted_spread','actual_margin','spread_error','abs_spread_error']].head()


Unnamed: 0,matchup,predicted_spread,actual_margin,spread_error,abs_spread_error
0,DET@WAS,-0.158054,-22.0,21.841946,21.841946
1,BUF@MIA,0.448009,17.0,-16.551991,16.551991
2,LA@SF,-0.342317,-16.0,15.657683,15.657683
3,NO@CAR,4.716448,-10.0,14.716448,14.716448
4,ARI@SEA,8.048971,22.0,-13.951029,13.951029


## 6. Feature vs Error Correlations

In [8]:
corr = diagnostic_summary.select_dtypes('number').corrwith(diagnostic_summary['spread_error']).dropna().sort_values(key=abs, ascending=False)
corr.head(15)


invalid value encountered in divide


invalid value encountered in divide


invalid value encountered in divide


invalid value encountered in divide



spread_error                 1.000000
rest_advantage_home         -0.987219
actual_margin               -0.985778
rest_differential           -0.856955
rest_home                   -0.644670
rest_away                    0.564422
abs_spread_error             0.490981
opp_strength_away_season     0.464639
brier_component              0.426603
predicted_spread            -0.425192
home_win_probability        -0.359040
opp_strength_home_season     0.349029
opp_strength_differential   -0.336254
home_team_point_diff        -0.288360
point_diff_gap_flag          0.199513
dtype: float64

## 7. Bucket Errors By Rest & Point Differential

In [9]:
diagnostic_summary['rest_bin'] = pd.cut(diagnostic_summary['rest_differential'], bins=[-10,-3,-1,0,1,3,10])
diagnostic_summary['point_diff_bin'] = pd.cut(diagnostic_summary['point_diff_differential'], bins=[-30,-10,-5,0,5,10,30])
rest_error = diagnostic_summary.groupby('rest_bin')['abs_spread_error'].mean().to_frame('mean_abs_error')
point_error = diagnostic_summary.groupby('point_diff_bin')['abs_spread_error'].mean().to_frame('mean_abs_error')
rest_error, point_error







(           mean_abs_error
 rest_bin                 
 (-10, -3]             NaN
 (-3, -1]              NaN
 (-1, 0]         17.405359
 (0, 1]          13.951029
 (1, 3]          16.551991
 (3, 10]               NaN,
                 mean_abs_error
 point_diff_bin                
 (-30, -10]           19.196968
 (-10, -5]            15.657683
 (-5, 0]                    NaN
 (0, 5]                     NaN
 (5, 10]              14.333739
 (10, 30]                   NaN)

## 8. Compare To Training Feature Medians

In [10]:
import pickle
from pathlib import Path
candidate_paths = [
    Path('models/feature_medians_nfl_v2.pkl'),
    Path('../models/feature_medians_nfl_v2.pkl'),
    Path('../../models/feature_medians_nfl_v2.pkl')
]
for medians_path in candidate_paths:
    if medians_path.exists():
        break
else:
    raise FileNotFoundError('feature_medians_nfl_v2.pkl not found in expected locations')
with open(medians_path, 'rb') as f:
    feature_medians = pickle.load(f)
median_diffs = []
for feature in ['rest_differential','point_diff_differential','opp_strength_differential','model_spread_feature']:
    if feature in diagnostic_summary.columns:
        median_val = feature_medians.get(feature, 0)
        mean_val = diagnostic_summary[feature].mean()
        median_diffs.append({
            'feature': feature,
            'training_median': median_val,
            'mean_on_misses': mean_val,
            'delta': mean_val - median_val
        })
pd.DataFrame(median_diffs)


Unnamed: 0,feature,training_median,mean_on_misses,delta
0,rest_differential,0.0,0.8,0.8
1,point_diff_differential,0.0,-3.963889,-3.963889
2,opp_strength_differential,0.014286,0.396142,0.381856


## 9. Notes & Next Steps
- Use the correlation and binning outputs to decide which features need scaling/capping.
- If certain bins show huge errors, consider adding interaction terms or retraining with those segments emphasized.