# NBA Spread Betting Strategy Exploration

Load backtest results from `backtest_nba_spread.py --output-csv` and explore:
- Threshold sweep (edge vs confidence)
- Calibration (model prob vs actual cover rate)
- EV by edge bucket
- Filtering for high-ROI strategies

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os

sns.set_style("whitegrid")
pd.set_option('display.float_format', '{:.3f}'.format)

## 1. Load Backtest Results

Run first: `python scripts/backtest_nba_spread.py --project <PROJECT> --season 2025 --start-date 2025-11-01 --end-date 2026-01-15 --output-csv notebooks/backtest_results.csv`

In [None]:
# Try backtest_results.csv in cwd or project root (../backtest_results.csv)
csv_path = 'backtest_results.csv'
if not os.path.exists(csv_path):
    csv_path = os.path.join(os.path.dirname(os.getcwd()), 'backtest_results.csv')
if not os.path.exists(csv_path):
    raise FileNotFoundError("Run: python scripts/backtest_nba_spread.py --project <PROJECT> --season 2025 --output-csv backtest_results.csv")
df = pd.read_csv(csv_path)
df['game_date'] = pd.to_datetime(df['game_date'])
print(f"Loaded {len(df)} rows")
print(df.head())

In [None]:
# Ensure we have required columns and compute edge
if 'edge' not in df.columns and 'book_spread' in df.columns:
    df['edge'] = df['predicted_spread'] - df['book_spread']
df_with_odds = df[df['book_spread'].notna()].copy()
print(f"Games with odds: {len(df_with_odds)}")

## 2. Threshold Sweep

Vary edge_threshold and min_confidence; report ROI and sample size.

In [None]:
def sim_roi(data, edge_thresh, min_conf, unit=1.0):
    """Simulate betting and return ROI."""
    d = data[(data['edge'].abs() > edge_thresh) & (data['confidence'] >= min_conf)].copy()
    if d.empty:
        return 0.0, 0
    d['bet_home'] = d['edge'] < -edge_thresh
    d['bet_away'] = d['edge'] > edge_thresh
    d['bet_won'] = np.where(d['bet_home'], d['home_cover'], ~d['home_cover'])
    payout = 100/110  # -110 default
    won = d['bet_won'].fillna(False)
    units_won = won.sum() * payout * unit
    units_lost = (len(d) - won.sum()) * unit
    roi = (units_won - units_lost) / (len(d) * unit) if len(d) > 0 else 0
    return roi, len(d)

sweep = []
for et in [0.5, 1.0, 1.5, 2.0]:
    for mc in [0.0, 0.2, 0.4]:
        roi, n = sim_roi(df_with_odds, et, mc)
        sweep.append({'edge_threshold': et, 'min_confidence': mc, 'roi': roi, 'n_bets': n})

sweep_df = pd.DataFrame(sweep)
pivot_roi = sweep_df.pivot(index='edge_threshold', columns='min_confidence', values='roi')
pivot_n = sweep_df.pivot(index='edge_threshold', columns='min_confidence', values='n_bets')
print("ROI by (edge_threshold, min_confidence):")
print(pivot_roi.to_string())
print("\nSample size:")
print(pivot_n.to_string())

## 3. Calibration: Model Prob vs Actual Cover Rate

When model says home_win_prob = X%, do we actually cover at that rate?

In [None]:
from sklearn.calibration import calibration_curve

d = df_with_odds[df_with_odds['home_win_prob'].notna()].copy()
if len(d) >= 20:
    prob_true, prob_pred = calibration_curve(d['home_cover'].astype(int), d['home_win_prob'], n_bins=10)
    plt.figure(figsize=(6, 5))
    plt.plot(prob_pred, prob_true, marker='o', label='Model')
    plt.plot([0, 1], [0, 1], linestyle='--', label='Perfect')
    plt.xlabel('Predicted Home Cover Prob')
    plt.ylabel('Actual Cover Rate')
    plt.title('Calibration: home_win_prob vs home_cover')
    plt.legend()
    plt.tight_layout()
    plt.show()
else:
    print("Not enough data for calibration curve")

## 4. EV by Edge Bucket

Group by |edge| bins; plot actual ROI vs expected.

In [None]:
d = df_with_odds.copy()
d['abs_edge'] = d['edge'].abs()
d['bet_home'] = d['edge'] < -0.5
d['bet_away'] = d['edge'] > 0.5
d['bet_side'] = np.where(d['bet_home'], 'home', np.where(d['bet_away'], 'away', None))
d['bet_won'] = np.where(d['bet_home'], d['home_cover'], np.where(d['bet_away'], ~d['home_cover'], np.nan))
bets = d[d['bet_side'].notna()].copy()

if not bets.empty:
    bets['edge_bin'] = pd.cut(bets['abs_edge'], bins=[0.5, 1.0, 1.5, 2.0, 3.0, 10.0], labels=['0.5-1', '1-1.5', '1.5-2', '2-3', '3+'])
    by_bin = bets.groupby('edge_bin', observed=True).agg(
        n=('bet_won', 'count'),
        wins=('bet_won', lambda x: x.fillna(False).sum()),
    ).reset_index()
    by_bin['accuracy'] = by_bin['wins'] / by_bin['n']
    by_bin['roi'] = (by_bin['wins'] * (100/110) - (by_bin['n'] - by_bin['wins'])) / by_bin['n']
    print(by_bin)
    fig, ax = plt.subplots(figsize=(8, 4))
    ax.bar(by_bin['edge_bin'].astype(str), by_bin['roi'], color='steelblue', alpha=0.8)
    ax.axhline(0, color='black', linewidth=0.5)
    ax.set_xlabel('|Edge| (pts)')
    ax.set_ylabel('ROI')
    ax.set_title('ROI by Edge Bucket')
    plt.tight_layout()
    plt.show()
else:
    print("No bets to analyze")

## 5. Best Strategy Summary

Identify the threshold combo with highest ROI and sufficient sample size.

In [None]:
filtered = sweep_df[sweep_df['n_bets'] >= 30]
if not filtered.empty:
    best = filtered.loc[filtered['roi'].idxmax()]
    print(f"Best strategy (min 30 bets): edge_threshold={best['edge_threshold']}, min_confidence={best['min_confidence']}")
    print(f"  ROI: {best['roi']:.1%} | Bets: {int(best['n_bets'])}")
else:
    print("No strategy with >= 30 bets")