In [41]:
import sys, os, importlib, inspect
sys.path.append(os.path.abspath(".."))

import src.features.team as team_mod
importlib.reload(team_mod)

team_mod.__file__              # <- should point to /Users/ssjraz/NBA-Prediction-Model/src/features/team.py
[name for name, obj in inspect.getmembers(team_mod) if inspect.isfunction(obj)]


['add_four_factors',
 'add_home_away_rolling_pts',
 'add_rolling',
 'attach_opponent',
 'basic_context',
 'build_matchup_frame']

In [42]:
from src.features.team import basic_context, add_four_factors, attach_opponent, add_rolling, add_home_away_rolling_pts, build_matchup_frame
from src.models.baselines import predict_matchup_baseline, predict_matchup_baseline_homeaway


In [43]:
from src.data.nba_client import get_team_game_logs_season

df = get_team_game_logs_season("2024-25", ("Regular Season","Playoffs"))
df = basic_context(df)
df = add_four_factors(df)
df = attach_opponent(df)
df = add_rolling(df, window=10)
df = add_home_away_rolling_pts(df, window=10)

predict_matchup_baseline("Boston Celtics", "Dallas Mavericks", df, col="PTS_r10")
predict_matchup_baseline_homeaway("Boston Celtics", "Dallas Mavericks", df, window=10)


{'home': 'Boston Celtics',
 'away': 'Dallas Mavericks',
 'pred_home': 112.4,
 'pred_away': 108.9,
 'winner': 'Boston Celtics'}

In [44]:
matchups = build_matchup_frame(df, window=10)
matchups.head()



Unnamed: 0,GAME_ID,TEAM_ID,TEAM_NAME,HOME,days_rest,is_b2b,eFG_r10,FTr_r10,TOV_rate_r10,ORB_pct_r10,PTS_r10,eFG_allowed_r10_opp,TOV_rate_forced_r10_opp,PTS
0,22400064,1610612737,Atlanta Hawks,True,,0,,,,,,,,120
1,22400079,1610612737,Atlanta Hawks,True,2.0,0,,,,,,,,125
2,22400100,1610612737,Atlanta Hawks,False,2.0,0,0.520734,0.454272,0.158173,0.265538,116.333333,0.424251,0.188422,104
3,22400103,1610612737,Atlanta Hawks,True,1.0,1,0.534069,0.451815,0.156069,0.236653,117.0,0.590516,0.146849,119
4,22400121,1610612737,Atlanta Hawks,False,2.0,0,0.534623,0.405663,0.155979,0.245137,117.6,0.577098,0.149042,120


In [45]:
matchups_clean = matchups.dropna(subset=["eFG_r10","FTr_r10","TOV_rate_r10","ORB_pct_r10",
                                         "eFG_allowed_r10_opp","TOV_rate_forced_r10_opp"])
feature_cols = ["eFG_r10","FTr_r10","TOV_rate_r10","ORB_pct_r10",
                "eFG_allowed_r10_opp","TOV_rate_forced_r10_opp","HOME", "days_rest","is_b2b"]
X = matchups_clean[feature_cols]
y = matchups_clean["PTS"]

In [49]:
from sklearn.linear_model import Ridge
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_absolute_error
import numpy as np
import pandas as pd

tscv = TimeSeriesSplit(n_splits=5)
train_idx, test_idx = list(tscv.split(X))[-1]

model = Ridge(alpha=5.0, fit_intercept=True)
model.fit(X.iloc[train_idx], y.iloc[train_idx])

y_pred = model.predict(X.iloc[test_idx])
print("Validation MAE (Ridge):", round(mean_absolute_error(y.iloc[test_idx], y_pred), 2))

# Optional: compare to naive baseline PTS_r10 if you have it in matchups_clean
if "PTS_r10" in matchups_clean.columns:
    base = matchups_clean.iloc[test_idx]["PTS_r10"].values
    print("Validation MAE (Baseline PTS_r10):", round(mean_absolute_error(y.iloc[test_idx], base), 2))

# Inspect learned weights
pd.Series(model.coef_, index=feature_cols).sort_values(ascending=False)


Validation MAE (Ridge): 9.8
Validation MAE (Baseline PTS_r10): 8.85


eFG_r10                    38.589057
eFG_allowed_r10_opp        28.317066
ORB_pct_r10                 6.063250
HOME                        2.050019
FTr_r10                     1.854960
days_rest                  -0.257353
is_b2b                     -1.493255
TOV_rate_r10              -15.435420
TOV_rate_forced_r10_opp   -15.458780
dtype: float64

In [None]:
def predict_matchup_model(model, df, home_team, away_team, window=10, feature_cols=None):
    m = build_matchup_frame(df, window=window)
    m = m.dropna(subset=[c for c in feature_cols if c not in ("days_rest","is_b2b")]).copy()
    # helper: latest valid row for a team
    def last_row(team):
        row = m[m["TEAM_NAME"] == team].dropna(subset=[c for c in feature_cols if c != "HOME"]).tail(1)
        for c in feature_cols:
            if c not in row.columns: row[c] = 0.0
        return row[feature_cols].fillna(0.0)

    home_X = last_row(home_team); away_X = last_row(away_team)
    pred_home = float(model.predict(home_X)[0])
    pred_away = float(model.predict(away_X)[0])

    return {
        "home": home_team, "away": away_team,
        "pred_home": round(pred_home, 1), "pred_away": round(pred_away, 1),
        "winner": home_team if pred_home > pred_away else away_team
    }

predict_matchup_model(model, df, "Boston Celtics", "Dallas Mavericks", window=10, feature_cols=feature_cols)


{'home': 'Golden State Warriors',
 'away': 'Los Angeles Lakers',
 'pred_home': 99.5,
 'pred_away': 109.8,
 'winner': 'Los Angeles Lakers'}