In [1]:
%load_ext autoreload
%autoreload 2

import sys
from pathlib import Path

# Ensure project root is on path
PROJECT_ROOT = Path.cwd().resolve().parents[0]
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

from project_code.functions import *

torch.set_num_threads(8)
torch.set_num_interop_threads(1)

In [2]:
np.random.seed(42)
torch.manual_seed(42)

<torch._C.Generator at 0x190f0d29d90>

In [3]:
# Download PBP
seasons = range(2017,2026)
print("Loading play-by-play for seasons:", seasons)
raw_pbp = nfl.import_pbp_data(seasons, downcast=False)  # returns a DataFrame (likely large)

print("Rows loaded:", raw_pbp.shape[0])
raw_pbp.head()

Loading play-by-play for seasons: range(2017, 2026)
2017 done.
2018 done.
2019 done.
2020 done.
2021 done.
2022 done.
2023 done.
2024 done.
2025 done.
Rows loaded: 436410


Unnamed: 0,play_id,game_id,old_game_id_x,home_team,away_team,season_type,week,posteam,posteam_type,defteam,...,route,defense_man_zone_type,defense_coverage_type,offense_names,defense_names,offense_positions,defense_positions,offense_numbers,defense_numbers,old_game_id
0,1.0,2017_01_ARI_DET,2017091004,DET,ARI,REG,1,,,,...,,,,,,,,,,
1,37.0,2017_01_ARI_DET,2017091004,DET,ARI,REG,1,ARI,away,DET,...,,,,,,,,,,
2,73.0,2017_01_ARI_DET,2017091004,DET,ARI,REG,1,ARI,away,DET,...,SCREEN,,,,,,,,,
3,97.0,2017_01_ARI_DET,2017091004,DET,ARI,REG,1,ARI,away,DET,...,,,,,,,,,,
4,118.0,2017_01_ARI_DET,2017091004,DET,ARI,REG,1,ARI,away,DET,...,ANGLE,,,,,,,,,


In [4]:
cols_to_keep = ['play_type', 'season', 'home_wp_post', 'away_wp_post', 'weather', 'yardline_100', 'ydstogo',
               'game_seconds_remaining', 'half_seconds_remaining', 'posteam', 'defteam',
               'posteam_timeouts_remaining', 'defteam_timeouts_remaining', 'kick_distance', 'touchback',
                'return_yards', 'first_down', 'touchdown', 'game_id', 'score_differential',
                'home_team', 'away_team', 'home_score', 'away_score', 'down', 'field_goal_result', 'penalty',
               'home_coach', 'away_coach', 'spread_line', 'total_line']

pbp = raw_pbp.loc[:, cols_to_keep].copy()

In [5]:
action_to_col = {
    "punt": "punt",
    "field_goal": "field_goal",
    "run": "go",
    "pass": "go"
}

pbp["play_type_actual"] = pbp["play_type"].map(action_to_col)
pbp = pbp[pbp.play_type_actual.isin(['punt', 'go', 'field_goal'])]
pbp = deconstruct_weather(pbp)
pbp = pbp[pbp.penalty == 0]
pbp['fg_made'] = (pbp["field_goal_result"] == "made").astype(int)

action_to_ewp_col = {
    "punt": "ewp_punt",
    "field_goal": "ewp_fg",
    "go": "ewp_go"
}
pbp["actual_ewp_col"] = pbp["play_type_actual"].map(action_to_ewp_col)

pbp["possession_coach"] = np.where(pbp["posteam"] == pbp["home_team"], pbp["home_coach"], pbp["away_coach"])
pbp["defending_coach"] = np.where(pbp["posteam"] == pbp["home_team"], pbp["away_coach"], pbp["home_coach"])
pbp["possession_spread_line"] = np.where(pbp["posteam"] == pbp["home_team"], pbp["spread_line"], -pbp["spread_line"])

pbp = create_features(pbp)

In [6]:
seasons = pbp.season.unique() # seasons
test_season = seasons.max()

pbp_train = pbp[pbp.season != test_season]
pbp_test = pbp[pbp.season == test_season]

In [7]:
# --- Drop rows missing home/away WP
wp_df = pbp_train.dropna(subset=["home_wp_post", "away_wp_post"]).copy()

# --- Define features
wp_features = [
    "yardline_100",
    "down",
    "ydstogo",
    "game_seconds_remaining",
    "half_seconds_remaining",
    "score_differential",
    "posteam_timeouts_remaining",
    "defteam_timeouts_remaining",
    "temp_F",
    "wind_mph",
    "possession_spread_line",
    "total_line",
    "is_redzone",
    "is_goal_to_go",
    "log_ydstogo",
    "log_game_seconds_remaining",
    "abs_score_differential",
    "score_time_ratio",
]

engineered = {
    "is_redzone",
    "is_goal_to_go",
    "log_ydstogo",
    "log_game_seconds_remaining",
    "abs_score_differential",
    "score_time_ratio",
}

wp_base_features = [f for f in wp_features if f not in engineered]

# --- Define posteam WP target
wp_df["wp_target"] = np.where(
    wp_df["posteam"] == wp_df["home_team"],
    wp_df["home_wp_post"],
    wp_df["away_wp_post"],
)

# NEW: create engineered columns on the full frame
wp_df = create_features(wp_df)

wp_df = wp_df.reset_index(drop=True)

X_wp = wp_df[wp_features]
y_wp = wp_df["wp_target"]

epsilon = 1e-6
y_wp_clipped = y_wp.clip(epsilon, 1 - epsilon).reset_index(drop=True)

monotone_constraints_dict = {
    "yardline_100": -1,
    "ydstogo": -1,
    "score_differential": 1,
    "possession_spread_line": -1,
}

mono_tuple = tuple(monotone_constraints_dict.get(c, 0) for c in X_wp.columns)

wp_folds = make_temporal_folds(wp_df)

In [8]:
wp_storage = "sqlite:///" + str((Path.cwd().parent / "optuna" / "wp_study.db").resolve())

wp_study = optuna.create_study(
    study_name="wp_study_v9",
    direction="minimize",
    storage=wp_storage,
    load_if_exists=True
)

wp_fixed_params = {
    "n_estimators": 2000,
    "eval_metric": "rmse",
    "tree_method": "hist",
    "early_stopping_rounds": 100,
    "max_bin": 128,
    "verbosity": 0,
    "n_jobs": 14,
}

def objective(trial):
    
    return wp_objective(
        trial,
        wp_fixed_params=wp_fixed_params,
        X_wp=X_wp,
        y_wp_clipped=y_wp_clipped,
        wp_folds=wp_folds,
        mono_tuple=mono_tuple,
    )

wp_study.optimize(objective, n_trials=50, n_jobs=1)

[32m[I 2026-02-02 21:21:31,789][0m Using an existing study with name 'wp_study_v9' instead of creating a new one.[0m
[32m[I 2026-02-02 21:22:27,310][0m Trial 50 finished with value: 0.05799957364797592 and parameters: {'learning_rate': 0.03682100398337882, 'max_depth': 6, 'subsample': 0.8969201900529045, 'colsample_bytree': 0.7786310509601547, 'min_child_weight': 3.463726821509415, 'reg_lambda': 20.834585385906056}. Best is trial 50 with value: 0.05799957364797592.[0m
[32m[I 2026-02-02 21:23:46,251][0m Trial 51 finished with value: 0.05809715390205383 and parameters: {'learning_rate': 0.030524504321558954, 'max_depth': 6, 'subsample': 0.8930800884269852, 'colsample_bytree': 0.777213582239772, 'min_child_weight': 4.846257437999018, 'reg_lambda': 23.986907835619395}. Best is trial 50 with value: 0.05799957364797592.[0m
[32m[I 2026-02-02 21:25:00,205][0m Trial 52 finished with value: 0.05801469087600708 and parameters: {'learning_rate': 0.030312478369655282, 'max_depth': 6, 'su

[32m[I 2026-02-02 21:54:12,637][0m Trial 73 finished with value: 0.058034610003232956 and parameters: {'learning_rate': 0.022124369355014786, 'max_depth': 6, 'subsample': 0.8657703615860732, 'colsample_bytree': 0.8127475540768607, 'min_child_weight': 8.160950279536397, 'reg_lambda': 21.564516747138715}. Best is trial 50 with value: 0.05799957364797592.[0m
[32m[I 2026-02-02 21:55:37,967][0m Trial 74 finished with value: 0.05801847577095032 and parameters: {'learning_rate': 0.021497403053897503, 'max_depth': 6, 'subsample': 0.873190660000237, 'colsample_bytree': 0.8038045274146703, 'min_child_weight': 3.690812862174062, 'reg_lambda': 16.35309829791442}. Best is trial 50 with value: 0.05799957364797592.[0m
[32m[I 2026-02-02 21:56:57,681][0m Trial 75 finished with value: 0.05802151560783386 and parameters: {'learning_rate': 0.025025410079871153, 'max_depth': 6, 'subsample': 0.8755877099064716, 'colsample_bytree': 0.8036421830541852, 'min_child_weight': 3.7770007640772905, 'reg_lamb

[32m[I 2026-02-02 22:27:16,369][0m Trial 96 finished with value: 0.0579969696700573 and parameters: {'learning_rate': 0.024456139196128455, 'max_depth': 6, 'subsample': 0.8327162369230732, 'colsample_bytree': 0.862821696176629, 'min_child_weight': 1.9719902035454941, 'reg_lambda': 3.8969213761320414}. Best is trial 96 with value: 0.0579969696700573.[0m
[32m[I 2026-02-02 22:28:37,575][0m Trial 97 finished with value: 0.0580013208091259 and parameters: {'learning_rate': 0.02621269624669856, 'max_depth': 6, 'subsample': 0.8272000315145113, 'colsample_bytree': 0.8630442729687039, 'min_child_weight': 1.6062395304844894, 'reg_lambda': 5.102046000802175}. Best is trial 96 with value: 0.0579969696700573.[0m
[32m[I 2026-02-02 22:30:03,795][0m Trial 98 finished with value: 0.057993821799755096 and parameters: {'learning_rate': 0.024498685128358556, 'max_depth': 6, 'subsample': 0.8320590406720438, 'colsample_bytree': 0.8494037081001251, 'min_child_weight': 1.3922479926179048, 'reg_lambda'

In [9]:
wp_best_params = wp_study.best_params.copy()
wp_best_score = wp_study.best_value

print("Best CV RMSE:", wp_best_score)
print()
print("Best params:", wp_best_params)

# Remove any tuned keys that collide with fixed keys (robust to old studies)
for k in list(wp_best_params.keys()):
    if k in wp_fixed_params:
        wp_best_params.pop(k)

final_params = {**wp_fixed_params, **wp_best_params, "monotone_constraints": mono_tuple}

# EARLY STOPPING REQUIRES eval_set -> remove for full-data fit
final_params.pop("early_stopping_rounds", None)

wp_model = XGBRegressor(**final_params)
wp_model.fit(X_wp, y_wp_clipped)

Best CV RMSE: 0.057993821799755096

Best params: {'colsample_bytree': 0.8494037081001251, 'learning_rate': 0.024498685128358556, 'max_depth': 6, 'min_child_weight': 1.3922479926179048, 'reg_lambda': 4.977348059603375, 'subsample': 0.8320590406720438}


XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=0.8494037081001251, device=None,
             early_stopping_rounds=None, enable_categorical=False,
             eval_metric='rmse', feature_types=None, gamma=None,
             grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=0.024498685128358556,
             max_bin=128, max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=6, max_leaves=None,
             min_child_weight=1.3922479926179048, missing=nan,
             monotone_constraints=(-1, 0, -1, 0, 0, 1, 0, 0, 0, 0, -1, 0, 0, 0,
                                   0, 0, 0, 0),
             multi_strategy=None, n_estimators=2000, n_jobs=14,
             num_parallel_tree=None, random_state=None, ...)

In [10]:
wp_importances = (pd.DataFrame({
        "feature": wp_features,
        "importance": wp_model.feature_importances_
    })
    .sort_values("importance", ascending=False)
)

print(wp_importances.to_string(index=False))

                   feature  importance
        score_differential    0.894392
          score_time_ratio    0.053472
              yardline_100    0.018473
                is_redzone    0.010007
                      down    0.005125
posteam_timeouts_remaining    0.003293
    abs_score_differential    0.002775
    game_seconds_remaining    0.002366
defteam_timeouts_remaining    0.002309
                   ydstogo    0.001847
log_game_seconds_remaining    0.001840
             is_goal_to_go    0.001195
    half_seconds_remaining    0.000746
               log_ydstogo    0.000583
    possession_spread_line    0.000450
                total_line    0.000392
                    temp_F    0.000375
                  wind_mph    0.000362


In [11]:
# Create punt_df with only punt plays
punt_df = pbp_train[pbp_train.play_type_actual == "punt"].dropna(subset=["kick_distance", "return_yards"]).copy()

punt_df["net_punt"] = punt_df["kick_distance"] - punt_df["return_yards"]
punt_df.loc[punt_df["touchback"] == 1, "net_punt"] = punt_df["yardline_100"] - 20

punt_df = punt_df.reset_index(drop=True)

punt_folds = make_temporal_folds(punt_df)

punt_features = [
    "yardline_100", 
    "game_seconds_remaining", 
    "half_seconds_remaining",
    "score_differential",
    "posteam_timeouts_remaining",
    "defteam_timeouts_remaining",
    "temp_F",
    "wind_mph",
    "possession_spread_line",
    "total_line",
    "is_redzone",
    "is_goal_to_go",
    "log_ydstogo",
    "log_game_seconds_remaining",
    "abs_score_differential",
    "score_time_ratio"
]

X_punt = punt_df[punt_features].to_numpy(dtype=np.float32, copy=False)
y_punt = punt_df["net_punt"].to_numpy(dtype=np.float32, copy=False)

In [12]:
punt_storage = "sqlite:///" + str((Path.cwd().parent / "optuna" / "punt_study.db").resolve())

punt_study = optuna.create_study(
    study_name="punt_study_xgb_v1",
    direction="minimize",
    storage=punt_storage,
    load_if_exists=True
)

def objective(trial):
    return punt_objective(
        trial,
        X_punt=X_punt,
        y_punt=y_punt,
        punt_folds=punt_folds
    )

punt_study.optimize(objective, n_trials=50, n_jobs=1)

[32m[I 2026-02-02 22:32:19,242][0m Using an existing study with name 'punt_study_xgb_v1' instead of creating a new one.[0m
[32m[I 2026-02-02 22:32:21,070][0m Trial 53 finished with value: 9.729333877563477 and parameters: {'max_depth': 3, 'learning_rate': 0.018137947031644557, 'subsample': 0.6327200449442985, 'colsample_bytree': 0.6579148962882254, 'min_child_weight': 40.31577327106333, 'reg_lambda': 15.116487316387236, 'reg_alpha': 1.1534600067334376e-05, 'gamma': 4.047164547646069}. Best is trial 53 with value: 9.729333877563477.[0m
[32m[I 2026-02-02 22:32:22,766][0m Trial 54 finished with value: 9.729665756225586 and parameters: {'max_depth': 3, 'learning_rate': 0.0202744682852997, 'subsample': 0.6272031466948709, 'colsample_bytree': 0.7955905212829146, 'min_child_weight': 30.336338475115298, 'reg_lambda': 42.70304806517147, 'reg_alpha': 1.528260007370466e-06, 'gamma': 4.546886014695731}. Best is trial 53 with value: 9.729333877563477.[0m
[32m[I 2026-02-02 22:32:24,643][0

[32m[I 2026-02-02 22:32:54,461][0m Trial 73 finished with value: 9.745199203491211 and parameters: {'max_depth': 5, 'learning_rate': 0.02115661696772175, 'subsample': 0.6525122411464009, 'colsample_bytree': 0.8753176540577898, 'min_child_weight': 1.8428454122200473, 'reg_lambda': 15.389047593202445, 'reg_alpha': 1.3037661570464065e-06, 'gamma': 4.433088655046191}. Best is trial 55 with value: 9.727398872375488.[0m
[32m[I 2026-02-02 22:32:55,669][0m Trial 74 finished with value: 9.75057601928711 and parameters: {'max_depth': 5, 'learning_rate': 0.06666098797162676, 'subsample': 0.6351980188801262, 'colsample_bytree': 0.8527919369532163, 'min_child_weight': 3.2941438773064764, 'reg_lambda': 10.20519200241485, 'reg_alpha': 2.8348131379547803e-06, 'gamma': 3.9983213905997133}. Best is trial 55 with value: 9.727398872375488.[0m
[32m[I 2026-02-02 22:32:56,807][0m Trial 75 finished with value: 9.76168155670166 and parameters: {'max_depth': 5, 'learning_rate': 0.08669853974718796, 'sub

[32m[I 2026-02-02 22:33:29,696][0m Trial 93 finished with value: 9.730398178100586 and parameters: {'max_depth': 4, 'learning_rate': 0.01735419190780674, 'subsample': 0.6003291409206575, 'colsample_bytree': 0.887768247002197, 'min_child_weight': 42.178593036732615, 'reg_lambda': 36.49901295402827, 'reg_alpha': 0.011421757740042917, 'gamma': 0.5321314749852715}. Best is trial 55 with value: 9.727398872375488.[0m
[32m[I 2026-02-02 22:33:31,761][0m Trial 94 finished with value: 9.730513572692871 and parameters: {'max_depth': 4, 'learning_rate': 0.016731044559201404, 'subsample': 0.6006371110534192, 'colsample_bytree': 0.8875440173146668, 'min_child_weight': 43.678560622470144, 'reg_lambda': 22.946897530298127, 'reg_alpha': 0.010497608693210665, 'gamma': 0.5685280693617925}. Best is trial 55 with value: 9.727398872375488.[0m
[32m[I 2026-02-02 22:33:33,915][0m Trial 95 finished with value: 9.733306884765625 and parameters: {'max_depth': 4, 'learning_rate': 0.016688916370720838, 'sub

In [13]:
punt_best_params = punt_study.best_params.copy()
punt_best_score = punt_study.best_value

print("Best CV RMSE (yards):", punt_best_score)
print()
print("Best params:", punt_best_params)

# ---- Train final model on full data using best params ----
punt_fixed_params = {
    "n_estimators": 5000,
    "objective": "reg:squarederror",
    "eval_metric": "rmse",
    "tree_method": "hist",
    "max_bin": 256,
    "verbosity": 0,
    "n_jobs": 14,
}

punt_model = XGBRegressor(**punt_fixed_params, **punt_best_params)
punt_model.fit(X_punt, y_punt, verbose=False)

Best CV RMSE (yards): 9.727398872375488

Best params: {'colsample_bytree': 0.8180273592915068, 'gamma': 4.993223792429942, 'learning_rate': 0.018492088542246463, 'max_depth': 3, 'min_child_weight': 1.6324838037060703, 'reg_alpha': 1.7387540225844615e-06, 'reg_lambda': 25.593595529384885, 'subsample': 0.6045353484751955}


XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=0.8180273592915068, device=None,
             early_stopping_rounds=None, enable_categorical=False,
             eval_metric='rmse', feature_types=None, gamma=4.993223792429942,
             grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=0.018492088542246463,
             max_bin=256, max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=3, max_leaves=None,
             min_child_weight=1.6324838037060703, missing=nan,
             monotone_constraints=None, multi_strategy=None, n_estimators=5000,
             n_jobs=14, num_parallel_tree=None, random_state=None, ...)

In [14]:
punt_importances = (pd.DataFrame({
        "feature": punt_features,
        "importance": punt_model.feature_importances_
    })
    .sort_values("importance", ascending=False)
)

print(punt_importances.to_string(index=False))

                   feature  importance
              yardline_100    0.216947
               log_ydstogo    0.065309
          score_time_ratio    0.063279
log_game_seconds_remaining    0.062402
                    temp_F    0.062361
    half_seconds_remaining    0.061326
defteam_timeouts_remaining    0.060475
    abs_score_differential    0.060249
    game_seconds_remaining    0.060107
    possession_spread_line    0.058813
        score_differential    0.057828
                  wind_mph    0.057540
                total_line    0.057324
posteam_timeouts_remaining    0.056039
                is_redzone    0.000000
             is_goal_to_go    0.000000


In [15]:
# --- Filter to field goal attempts only ---
fg_df = pbp_train[pbp_train.play_type_actual == "field_goal"].dropna(subset=["field_goal_result"]).copy()
fg_df = fg_df[fg_df.field_goal_result.isin(['made', 'missed', 'blocked'])]

# Field goal
fg_features = [
    "yardline_100",
    "game_seconds_remaining",
    "half_seconds_remaining",
    "score_differential",
    "temp_F",
    "wind_mph",
    "possession_spread_line",
    "total_line",
    "is_redzone",
    "is_goal_to_go",
    "log_ydstogo",
    "log_game_seconds_remaining",
    "abs_score_differential",
    "score_time_ratio"
]

X_fg = fg_df[fg_features]
y_fg = fg_df["fg_made"]

fg_folds = make_temporal_folds(fg_df)

fg_oof_pred = pd.Series(index=fg_df.index, dtype=float)

for fold_num, (train_idx, val_idx) in enumerate(fg_folds, 1):
    X_train = X_fg.loc[train_idx]
    y_train = y_fg.loc[train_idx]
    X_val   = X_fg.loc[val_idx]

    fg_model_lr_fold = make_pipeline(
        StandardScaler(),
        LogisticRegression(
            solver="lbfgs",
            max_iter=5000,
            C=1.0,
        )
    )

    fg_model_lr_fold.fit(X_train, y_train)
    fg_oof_pred.loc[val_idx] = fg_model_lr_fold.predict_proba(X_val)[:, 1]

mask = fg_oof_pred.notna()
fg_oof_rmse = np.sqrt(np.mean((fg_oof_pred[mask] - y_fg[mask]) ** 2))
print("FG (LogReg) OOF RMSE:", fg_oof_rmse)

# final model on all data (scaled consistently)
fg_model = make_pipeline(
    StandardScaler(),
    LogisticRegression(solver="lbfgs", max_iter=5000, C=1.0)
)
fg_model.fit(X_fg, y_fg)

FG (LogReg) OOF RMSE: 0.3363635719635666


Pipeline(steps=[('standardscaler', StandardScaler()),
                ('logisticregression', LogisticRegression(max_iter=5000))])

In [16]:
# pull fitted LR out of the pipeline
lr_ns = fg_model.named_steps["logisticregression"]

coef = lr_ns.coef_.ravel()  # shape (n_features,)
importance = np.abs(coef)

imp_df = pd.DataFrame({
    "feature": X_fg.columns,
    "coef_z": coef,                # per 1 std dev increase in feature
    "abs_coef_z": importance,
}).sort_values("abs_coef_z", ascending=False)

imp_df

Unnamed: 0,feature,coef_z,abs_coef_z
0,yardline_100,-0.925598,0.925598
8,is_redzone,0.161346,0.161346
9,is_goal_to_go,0.123166,0.123166
5,wind_mph,-0.114537,0.114537
4,temp_F,0.059581,0.059581
11,log_game_seconds_remaining,0.052261,0.052261
1,game_seconds_remaining,-0.047896,0.047896
2,half_seconds_remaining,0.039483,0.039483
3,score_differential,0.031377,0.031377
12,abs_score_differential,0.017875,0.017875


In [17]:
# Filter to 4th-down go-for-it plays
go_df = pbp_train[
    (pbp_train['down'] == 4) &
    (pbp_train['play_type_actual'] == 'go')  # filters out punts/FGs
].copy()

# Target: did the team convert?
go_df = go_df.dropna(subset=['first_down'])

# Go-for-it conversion
go_df["success"] = (
    (go_df["first_down"] == 1) |
    (go_df["touchdown"] == 1)
).astype(int)

# Reset index to avoid any issues
go_df = go_df.reset_index(drop=True)

# Make temporal folds based on seasons in punt_df
go_folds = make_temporal_folds(go_df)

# Features to predict net punt
go_features = [
    "yardline_100",
    "ydstogo",
    "game_seconds_remaining",
    "half_seconds_remaining",
    "score_differential",
    "posteam_timeouts_remaining",
    "defteam_timeouts_remaining",
    "temp_F",
    "wind_mph",
    "possession_spread_line",
    "total_line",
    "is_redzone",
    "is_goal_to_go",
    "log_ydstogo",
    "log_game_seconds_remaining",
    "abs_score_differential",
    "score_time_ratio"
]

X_go = go_df[go_features].values
y_go = go_df["success"].values

monotone_constraints = [
    -1,  # yardline_100 (farther → worse)
    -1,  # ydstogo (longer → worse)
    0,   # game_seconds_remaining
    0,   # half_seconds_remaining
    0,   # score_differential
    0,   # posteam_timeouts_remaining
    0,   # defteam_timeouts_remaining
    0,   # temp_F
    0,   # wind_mph
    0,   # possession_spread_line
    0,   # total_line
    0,   # is_redzone
    0,   # is_goal_to_go
    -1,  # log_ydstogo
    0,   # log_game_seconds_remaining
    0,   # abs_score_differential
    0    # score_time_ratio
]

mono_tuple_go = tuple(monotone_constraints)

In [18]:
go_storage = "sqlite:///" + str((Path.cwd().parent / "optuna" / "go_study.db").resolve())

sampler = optuna.samplers.TPESampler(seed=42, n_startup_trials=10)

go_study = optuna.create_study(
    study_name="go_study_v5",
    direction="minimize",
    storage=go_storage,
    load_if_exists=True,
    sampler=sampler
)

go_fixed_params = {
    "n_estimators": 1000,
    "eval_metric": "logloss",
    "tree_method": "hist",
    "early_stopping_rounds": 50,
    "max_bin": 128,
    "verbosity": 0,
    "n_jobs": 14,
    "objective": "binary:logistic"
}

def objective(trial):
    
    return go_objective(
        trial,
        go_fixed_params=go_fixed_params,
        X_go=X_go,
        y_go=y_go,
        go_folds=go_folds,
        mono_tuple_go=mono_tuple_go
    )

go_study.optimize(objective, n_trials=50, n_jobs=1)

[32m[I 2026-02-02 22:33:55,332][0m Using an existing study with name 'go_study_v5' instead of creating a new one.[0m
[32m[I 2026-02-02 22:34:00,425][0m Trial 50 finished with value: 0.6393473754030676 and parameters: {'max_depth': 4, 'learning_rate': 0.07555862295689228, 'subsample': 0.7298699314014634, 'colsample_bytree': 0.7430144588117322, 'min_child_weight': 36.69440327802611, 'reg_lambda': 0.1447855291776302, 'reg_alpha': 0.13031032735028197, 'gamma': 0.1410282085103153}. Best is trial 50 with value: 0.6393473754030676.[0m
[32m[I 2026-02-02 22:34:05,965][0m Trial 51 finished with value: 0.6397483344561377 and parameters: {'max_depth': 4, 'learning_rate': 0.07982687581857208, 'subsample': 0.7207580953182977, 'colsample_bytree': 0.7426202666593316, 'min_child_weight': 37.3659916383397, 'reg_lambda': 0.13447400633919276, 'reg_alpha': 0.2836106854633029, 'gamma': 0.14654490340229132}. Best is trial 50 with value: 0.6393473754030676.[0m
[32m[I 2026-02-02 22:34:11,107][0m Tri

[32m[I 2026-02-02 22:36:22,323][0m Trial 70 finished with value: 0.6382045414734094 and parameters: {'max_depth': 4, 'learning_rate': 0.02898638396255853, 'subsample': 0.7317325779919848, 'colsample_bytree': 0.7248120089768025, 'min_child_weight': 1.8963604791527786, 'reg_lambda': 0.01605837060005397, 'reg_alpha': 0.1206604272089512, 'gamma': 1.5315121831824154}. Best is trial 70 with value: 0.6382045414734094.[0m
[32m[I 2026-02-02 22:36:30,771][0m Trial 71 finished with value: 0.6381831347396578 and parameters: {'max_depth': 4, 'learning_rate': 0.02779603806860381, 'subsample': 0.7277937498500431, 'colsample_bytree': 0.7259227163972111, 'min_child_weight': 1.8587006568806712, 'reg_lambda': 0.015693461464697817, 'reg_alpha': 0.14981236114166885, 'gamma': 1.5537858344315982}. Best is trial 71 with value: 0.6381831347396578.[0m
[32m[I 2026-02-02 22:36:40,453][0m Trial 72 finished with value: 0.6387955882873145 and parameters: {'max_depth': 4, 'learning_rate': 0.024946294874171417

[32m[I 2026-02-02 22:38:59,922][0m Trial 90 finished with value: 0.6388719756839205 and parameters: {'max_depth': 4, 'learning_rate': 0.029507939704542192, 'subsample': 0.7557644256778099, 'colsample_bytree': 0.7567962108049838, 'min_child_weight': 1.5467428472614941, 'reg_lambda': 0.020549056985853853, 'reg_alpha': 0.034120270020338475, 'gamma': 1.5015805111099492}. Best is trial 71 with value: 0.6381831347396578.[0m
[32m[I 2026-02-02 22:39:06,786][0m Trial 91 finished with value: 0.6383069110968312 and parameters: {'max_depth': 4, 'learning_rate': 0.026758029043615777, 'subsample': 0.7465018184872529, 'colsample_bytree': 0.7819933795240942, 'min_child_weight': 1.1562050065101217, 'reg_lambda': 0.022650520628293973, 'reg_alpha': 0.015839965208173015, 'gamma': 1.801237486748954}. Best is trial 71 with value: 0.6381831347396578.[0m
[32m[I 2026-02-02 22:39:13,462][0m Trial 92 finished with value: 0.6381477600919089 and parameters: {'max_depth': 4, 'learning_rate': 0.0310868641992

In [19]:
go_best_params = go_study.best_params.copy()
go_best_score  = go_study.best_value

print("Best CV logloss:", go_best_score)
print()
print("Best params:", go_best_params)

# Remove any tuned keys that collide with fixed keys (robust to old studies)
for k in list(go_best_params.keys()):
    if k in go_fixed_params:
        go_best_params.pop(k)

final_params = {
    **go_fixed_params,
    **go_best_params,
    "monotone_constraints": mono_tuple_go,
}

# EARLY STOPPING REQUIRES eval_set -> remove for full-data fit
final_params.pop("early_stopping_rounds", None)

go_model = XGBClassifier(**final_params)
go_model.fit(X_go, y_go)

Best CV logloss: 0.6378868089635779

Best params: {'colsample_bytree': 0.7914405574432154, 'gamma': 1.9029147637853292, 'learning_rate': 0.028611537731746264, 'max_depth': 3, 'min_child_weight': 1.4242312191768751, 'reg_alpha': 0.10654719305229235, 'reg_lambda': 0.01887249771533841, 'subsample': 0.7315710911681592}


XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=0.7914405574432154, device=None,
              early_stopping_rounds=None, enable_categorical=False,
              eval_metric='logloss', feature_types=None,
              gamma=1.9029147637853292, grow_policy=None, importance_type=None,
              interaction_constraints=None, learning_rate=0.028611537731746264,
              max_bin=128, max_cat_threshold=None, max_cat_to_onehot=None,
              max_delta_step=None, max_depth=3, max_leaves=None,
              min_child_weight=1.4242312191768751, missing=nan,
              monotone_constraints=(-1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1,
                                    0, 0, 0),
              multi_strategy=None, n_estimators=1000, n_jobs=14,
              num_parallel_tree=None, random_state=None, ...)

In [20]:
go_importances = (pd.DataFrame({
        "feature": go_features,
        "importance": go_model.feature_importances_
    })
    .sort_values("importance", ascending=False)
)

print(go_importances.to_string(index=False))

                   feature  importance
                   ydstogo    0.201499
               log_ydstogo    0.195677
             is_goal_to_go    0.044430
    half_seconds_remaining    0.043779
                is_redzone    0.043308
                total_line    0.041988
              yardline_100    0.041527
          score_time_ratio    0.041086
log_game_seconds_remaining    0.040224
    game_seconds_remaining    0.039933
    abs_score_differential    0.039331
    possession_spread_line    0.039235
                    temp_F    0.039139
defteam_timeouts_remaining    0.038726
        score_differential    0.037934
                  wind_mph    0.036600
posteam_timeouts_remaining    0.035584


In [21]:
pbp_pre_train, pbp_fourth_train = create_df_with_ewp(
    pbp_train,
    wp_model=wp_model,
    go_model=go_model,
    fg_model=fg_model,
    punt_model=punt_model,
    wp_features=wp_features,
    wp_base_features=wp_base_features,
    go_features=go_features,
    fg_features=fg_features,
    punt_features=punt_features
)

pbp_pre_test, pbp_fourth_test = create_df_with_ewp(
    pbp_test,
    wp_model=wp_model,
    go_model=go_model,
    fg_model=fg_model,
    punt_model=punt_model,
    wp_features=wp_features,
    wp_base_features=wp_base_features,
    go_features=go_features,
    fg_features=fg_features,
    punt_features=punt_features
)

In [23]:
# testing git workflow

timestamp = datetime.now().strftime("%Y%m%d_%H%M")

# Variables
joblib.dump(test_season, f"../exports/test_season_{timestamp}.joblib")
joblib.dump(wp_features, f"../exports/wp_features_{timestamp}.joblib")
joblib.dump(wp_base_features, f"../exports/wp_base_features_{timestamp}.joblib")
joblib.dump(go_features, f"../exports/go_features_{timestamp}.joblib")
joblib.dump(fg_features, f"../exports/fg_features_{timestamp}.joblib")
joblib.dump(punt_features, f"../exports/punt_features_{timestamp}.joblib")
joblib.dump(go_folds, f"../exports/go_folds_{timestamp}.joblib")

# Dataframes
raw_pbp.to_parquet(f"../exports/raw_pbp{timestamp}.parquet")
pbp.to_parquet(f"../exports/pbp{timestamp}.parquet")
pbp_fourth_train.to_parquet(f"../exports/pbp_fourth_train_{timestamp}.parquet")
pbp_fourth_test.to_parquet(f"../exports/pbp_fourth_test_{timestamp}.parquet")

# Models
joblib.dump(wp_model, f"../exports/wp_model_{timestamp}.joblib")
joblib.dump(go_model, f"../exports/go_model_{timestamp}.joblib")
joblib.dump(fg_model, f"../exports/fg_model_{timestamp}.joblib")
joblib.dump(punt_model, f"../exports/punt_model_{timestamp}.joblib")

['../exports/punt_model_20260202_2252.joblib']