In [1089]:
import pandas as pd
from sklearn.model_selection import TimeSeriesSplit
from sklearn.feature_selection import SequentialFeatureSelector
from sklearn.impute import SimpleImputer
from sklearn.linear_model import RidgeClassifier
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score
from datetime import datetime, timedelta
import warnings

warnings.filterwarnings('ignore')

rr = RidgeClassifier(alpha=1)
split = TimeSeriesSplit(n_splits=3)
sfs = SequentialFeatureSelector(rr, n_features_to_select=30, direction="forward", cv=split)
scaler = MinMaxScaler()

In [770]:
df = pd.read_csv("2024_nfl_games.csv", index_col=0)
p_df = pd.read_csv("2024_nfl_prediction_games.csv", index_col=0)

In [771]:
# Deleting empty columns (% stats, etc.)
df = df.drop(columns = ["pass1d%", "passiay/pa", "passcay/cmp", "passcay/pa", "passyac/cmp", 
                        "passdrop%", "passbad%", "passprss", "passprss%", "passyds/scr", "rushybc/att", 
                        "rushyac/att", "rushatt/br", "recybc/r", "recyac/r", "recadot", "recrec/br", 
                        "recdrop%", "recrat", "defcmp%", "defyds/cmp", "defyds/tgt", "defrat", "defdadot", 
                        "defmtkl%", "kickrety/rt", "puntrety/r", "punty/p", "pass1d%_opp", "passiay/pa_opp", 
                        "passcay/cmp_opp", "passcay/pa_opp", "passyac/cmp_opp", "passdrop%_opp", "passbad%_opp", 
                        "passprss_opp", "passprss%_opp", "passyds/scr_opp", "rushybc/att_opp", "rushyac/att_opp", 
                        "rushatt/br_opp", "recybc/r_opp", "recyac/r_opp", "recadot_opp", "recrec/br_opp", 
                        "recdrop%_opp", "recrat_opp", "defcmp%_opp", "defyds/cmp_opp", "defyds/tgt_opp", 
                        "defrat_opp", "defdadot_opp", "defmtkl%_opp", "kickrety/rt_opp", "puntrety/r_opp", "punty/p_opp"])

In [772]:
df["date"] = pd.to_datetime(df["date"])
p_df["date"] = pd.to_datetime(p_df["date"])
# Initialize variables
week_date = datetime.strptime("9/5/2024", "%m/%d/%Y")
week_number = 1

# Iterate and assign week numbers
for i in range(len(df)):
    if df.loc[i, "date"] <= week_date + timedelta(days=6):
        df.loc[i, "week"] = week_number
    else:
        # Increment week_date and week_number
        week_date += timedelta(days=7)
        week_number += 1
        df.loc[i, "week"] = week_number

week_date = datetime.strptime("12/19/2024", "%m/%d/%Y")
week_number = 16

for i in range(len(p_df)):
    game_date = p_df.loc[i, "date"]
    
    # Check if the game is on Christmas (12/25)
    if game_date == datetime.strptime("12/25/2024", "%m/%d/%Y"):
        p_df.loc[i, "week"] = 17
    elif game_date <= week_date + timedelta(days=6):
        # Assign the current week if within the 7-day range
        p_df.loc[i, "week"] = week_number
    else:
        # Increment week_date and week_number for the next week
        week_date += timedelta(days=7)
        week_number += 1
        p_df.loc[i, "week"] = week_number

In [773]:
df.to_csv("2024_nfl_games_cleaned.csv")
p_df.to_csv("2024_nfl_prediction_games_cleaned.csv")

In [774]:
def add_target(team):
    team["target"] = team["won"].shift(-1)
    return team

df = df.groupby("Tm", group_keys=False).apply(add_target)
# combined_df = combined_df.groupby("Tm", group_keys=False).apply(add_target)

In [775]:
# combined_df[combined_df["Tm"] == "BAL"]
df[df["Tm"] == "BAL"]

Unnamed: 0,passcmp,passatt,passyds,pass1d,passiay,passcay,passyac,passdrops,passbadth,passsk,...,punty/p_max_opp,puntlng_max_opp,Tm_opp,Final_opp,home_opp,season,date,won,week,target
0,26,41,273,11,271,107,166,0,11,1,...,48.0,56,KAN,27,1,2024,2024-09-05,False,1.0,False
59,21,34,247,14,265,148,99,0,4,2,...,42.7,54,LVR,26,0,2024,2024-09-15,False,2.0,True
74,12,15,182,8,47,31,151,0,3,0,...,52.3,62,DAL,25,1,2024,2024-09-22,True,3.0,True
117,14,19,157,9,115,64,93,4,0,1,...,48.2,59,BUF,10,0,2024,2024-09-29,True,4.0,True
132,26,42,348,18,384,182,166,1,13,1,...,50.6,66,CIN,38,1,2024,2024-10-06,True,5.0,True
181,20,26,323,16,244,174,149,1,2,2,...,42.3,54,WAS,23,0,2024,2024-10-13,True,6.0,True
212,17,22,281,12,184,141,140,2,1,2,...,46.5,52,TAM,31,1,2024,2024-10-21,True,7.0,False
218,23,38,289,14,354,157,132,4,3,3,...,48.7,58,CLE,29,1,2024,2024-10-27,False,8.0,True
271,16,19,280,12,150,141,139,1,1,1,...,48.0,50,DEN,10,0,2024,2024-11-03,True,9.0,True
277,25,33,290,14,239,108,182,2,2,0,...,41.3,46,CIN,34,0,2024,2024-11-07,True,10.0,False


In [776]:
# combined_df["target"][pd.isnull(combined_df["target"])] = 2
df["target"][pd.isnull(df["target"])] = 2

In [777]:
# combined_df["target"] = combined_df["target"].astype(int, errors="ignore")
df["target"] = df["target"].astype(int, errors="ignore")

In [778]:
df

Unnamed: 0,passcmp,passatt,passyds,pass1d,passiay,passcay,passyac,passdrops,passbadth,passsk,...,punty/p_max_opp,puntlng_max_opp,Tm_opp,Final_opp,home_opp,season,date,won,week,target
0,26,41,273,11,271,107,166,0,11,1,...,48.0,56,KAN,27,1,2024,2024-09-05,False,1.0,0
1,20,28,291,15,165,103,188,2,2,2,...,37.5,40,BAL,20,0,2024,2024-09-05,True,1.0,1
2,17,35,260,9,314,122,138,4,7,2,...,50.5,54,PHI,34,1,2024,2024-09-06,False,1.0,1
3,20,34,278,14,242,108,170,0,5,2,...,37.0,41,GNB,29,0,2024,2024-09-06,True,1.0,0
4,17,23,156,6,122,102,54,0,4,2,...,52.5,59,ATL,10,1,2024,2024-09-08,True,1.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
409,18,29,295,13,245,144,151,1,3,4,...,44.3,51,LVR,13,0,2024,2024-12-08,True,14.0,2
410,33,44,369,18,280,162,207,2,3,2,...,49.5,52,DAL,20,1,2024,2024-12-09,True,14.0,2
411,16,31,183,9,233,97,86,2,9,3,...,40.3,61,CIN,27,0,2024,2024-12-09,False,14.0,2
412,16,27,160,6,177,75,85,2,4,0,...,45.1,55,SFO,6,1,2024,2024-12-12,True,15.0,2


In [779]:
df["won"].value_counts()

won
False    207
True     207
Name: count, dtype: int64

In [780]:
df["target"].value_counts()

target
0    191
1    191
2     32
Name: count, dtype: int64

In [781]:
nulls = pd.isnull(df)
nulls = nulls.sum()
nulls = nulls[nulls > 0]
# nulls = nulls[nulls > 96]
nulls

passyds/scr_max         90
rushatt/br_max         125
recrec/br_max          147
kickret_max             20
kickretyds_max          20
kickrety/rt_max        108
kickrettd_max           20
kickretlng_max          20
puntret_max             20
puntretyds_max          20
puntrety/r_max          78
puntrettd_max           20
puntretlng_max          20
xpm_max                 45
xpa_max                 40
fgm_max                 72
fga_max                 42
punty/p_max              7
passyds/scr_max_opp     90
rushatt/br_max_opp     125
recrec/br_max_opp      147
kickret_max_opp         20
kickretyds_max_opp      20
kickrety/rt_max_opp    108
kickrettd_max_opp       20
kickretlng_max_opp      20
puntret_max_opp         20
puntretyds_max_opp      20
puntrety/r_max_opp      78
puntrettd_max_opp       20
puntretlng_max_opp      20
xpm_max_opp             45
xpa_max_opp             40
fgm_max_opp             72
fga_max_opp             42
punty/p_max_opp          7
dtype: int64

In [782]:
valid_cols = df.columns[~df.columns.isin(nulls.index)]

In [783]:
valid_cols

Index(['passcmp', 'passatt', 'passyds', 'pass1d', 'passiay', 'passcay',
       'passyac', 'passdrops', 'passbadth', 'passsk',
       ...
       'puntyds_max_opp', 'puntlng_max_opp', 'Tm_opp', 'Final_opp', 'home_opp',
       'season', 'date', 'won', 'week', 'target'],
      dtype='object', length=272)

In [784]:
df = df[valid_cols].copy()

In [785]:
df

Unnamed: 0,passcmp,passatt,passyds,pass1d,passiay,passcay,passyac,passdrops,passbadth,passsk,...,puntyds_max_opp,puntlng_max_opp,Tm_opp,Final_opp,home_opp,season,date,won,week,target
0,26,41,273,11,271,107,166,0,11,1,...,144,56,KAN,27,1,2024,2024-09-05,False,1.0,0
1,20,28,291,15,165,103,188,2,2,2,...,75,40,BAL,20,0,2024,2024-09-05,True,1.0,1
2,17,35,260,9,314,122,138,4,7,2,...,101,54,PHI,34,1,2024,2024-09-06,False,1.0,1
3,20,34,278,14,242,108,170,0,5,2,...,74,41,GNB,29,0,2024,2024-09-06,True,1.0,0
4,17,23,156,6,122,102,54,0,4,2,...,210,59,ATL,10,1,2024,2024-09-08,True,1.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
409,18,29,295,13,245,144,151,1,3,4,...,177,51,LVR,13,0,2024,2024-12-08,True,14.0,2
410,33,44,369,18,280,162,207,2,3,2,...,198,52,DAL,20,1,2024,2024-12-09,True,14.0,2
411,16,31,183,9,233,97,86,2,9,3,...,121,61,CIN,27,0,2024,2024-12-09,False,14.0,2
412,16,27,160,6,177,75,85,2,4,0,...,316,55,SFO,6,1,2024,2024-12-12,True,15.0,2


In [786]:
df.to_csv("combined2.csv")

In [787]:
removed_cols = ["season", "date", "won", "target", "Tm", "Tm_opp", "week"]

In [788]:
selected_cols = df.columns[~df.columns.isin(removed_cols)]

In [789]:
df[selected_cols] = scaler.fit_transform(df[selected_cols])

In [790]:
df

Unnamed: 0,passcmp,passatt,passyds,pass1d,passiay,passcay,passyac,passdrops,passbadth,passsk,...,puntyds_max_opp,puntlng_max_opp,Tm_opp,Final_opp,home_opp,season,date,won,week,target
0,0.515152,0.600000,0.474388,0.380952,0.439216,0.315603,0.573913,0.000000,0.846154,0.111111,...,0.278530,0.666667,KAN,0.489796,1.0,2024,2024-09-05,False,1.0,0
1,0.333333,0.311111,0.514477,0.571429,0.231373,0.301418,0.669565,0.333333,0.153846,0.222222,...,0.145068,0.476190,BAL,0.346939,0.0,2024,2024-09-05,True,1.0,1
2,0.242424,0.466667,0.445434,0.285714,0.523529,0.368794,0.452174,0.666667,0.538462,0.222222,...,0.195358,0.642857,PHI,0.632653,1.0,2024,2024-09-06,False,1.0,1
3,0.333333,0.444444,0.485523,0.523810,0.382353,0.319149,0.591304,0.000000,0.384615,0.222222,...,0.143133,0.488095,GNB,0.530612,0.0,2024,2024-09-06,True,1.0,0
4,0.242424,0.200000,0.213808,0.142857,0.147059,0.297872,0.086957,0.000000,0.307692,0.222222,...,0.406190,0.702381,ATL,0.142857,1.0,2024,2024-09-08,True,1.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
409,0.272727,0.333333,0.523385,0.476190,0.388235,0.446809,0.508696,0.166667,0.230769,0.444444,...,0.342360,0.607143,LVR,0.204082,0.0,2024,2024-12-08,True,14.0,2
410,0.727273,0.666667,0.688196,0.714286,0.456863,0.510638,0.752174,0.333333,0.230769,0.222222,...,0.382979,0.619048,DAL,0.346939,1.0,2024,2024-12-09,True,14.0,2
411,0.212121,0.377778,0.273942,0.285714,0.364706,0.280142,0.226087,0.333333,0.692308,0.333333,...,0.234043,0.726190,CIN,0.489796,0.0,2024,2024-12-09,False,14.0,2
412,0.212121,0.288889,0.222717,0.142857,0.254902,0.202128,0.221739,0.333333,0.307692,0.000000,...,0.611219,0.654762,SFO,0.061224,1.0,2024,2024-12-12,True,15.0,2


In [791]:
sfs.fit(df[selected_cols], df["target"])

In [792]:
predictors = list(selected_cols[sfs.get_support()])

In [793]:
predictors

['passatt',
 'passhits',
 'rectgt',
 'kickrettd',
 'kickretlng',
 'puntlng',
 'passcay/pa_max',
 'passbad%_max',
 'passhits_max',
 'passprss%_max',
 'rushatt_max',
 'recbrktkl_max',
 'defcmp%_max',
 'defbltz_max',
 'defhrry_max',
 'defsk_max',
 'puntlng_max',
 'deftgt_opp',
 'defyac_opp',
 'defqbkd_opp',
 'kickrettd_opp',
 'puntrettd_opp',
 'passcay/cmp_max_opp',
 'passcay/pa_max_opp',
 'recybc_max_opp',
 'deftgt_max_opp',
 'defcmp_max_opp',
 'defrat_max_opp',
 'defyac_max_opp',
 'defcomb_max_opp']

In [794]:
def backtest_single_season(data, model, predictors, start_week=3, step=1):
    all_predictions = []

    weeks = sorted(data["week"].unique())

    for i in range(start_week, len(weeks), step):
        current_week = weeks[i]

        train = data[data["week"] < current_week]
        test = data[data["week"] == current_week]

        model.fit(train[predictors], train["target"])

        preds = model.predict(test[predictors])
        preds = pd.Series(preds, index=test.index)

        combined = pd.concat([test["target"], preds], axis=1)
        combined.columns= ["actual", "prediction"]

        all_predictions.append(combined)

    return pd.concat(all_predictions)

In [795]:
predictions = backtest_single_season(df, rr, predictors)

In [796]:
predictions

Unnamed: 0,actual,prediction
96,1,0
97,1,1
98,0,1
99,1,1
100,0,1
...,...,...
409,2,1
410,2,0
411,2,0
412,2,0


In [797]:
accuracy_score(predictions["actual"], predictions["prediction"])

0.5754716981132075

In [798]:
predictions_no_2s = predictions[predictions["actual"] != 2]
accuracy_score(predictions_no_2s["actual"], predictions_no_2s["prediction"])

0.6398601398601399

In [799]:
df.groupby("home").apply(lambda x: x[x["won"] == 1].shape[0] / x.shape[0])

home
0.0    0.483092
1.0    0.516908
dtype: float64

In [800]:
df_rolling = df[list(selected_cols) + ["won", "Tm", "season"]]

In [801]:
df_rolling

Unnamed: 0,passcmp,passatt,passyds,pass1d,passiay,passcay,passyac,passdrops,passbadth,passsk,...,defmtkl_max_opp,defmtkl%_max_opp,pnt_max_opp,puntyds_max_opp,puntlng_max_opp,Final_opp,home_opp,won,Tm,season
0,0.515152,0.600000,0.474388,0.380952,0.439216,0.315603,0.573913,0.000000,0.846154,0.111111,...,0.500000,1.000,0.272727,0.278530,0.666667,0.489796,1.0,False,BAL,2024
1,0.333333,0.311111,0.514477,0.571429,0.231373,0.301418,0.669565,0.333333,0.153846,0.222222,...,0.333333,0.500,0.181818,0.145068,0.476190,0.346939,0.0,True,KAN,2024
2,0.242424,0.466667,0.445434,0.285714,0.523529,0.368794,0.452174,0.666667,0.538462,0.222222,...,0.333333,0.667,0.181818,0.195358,0.642857,0.632653,1.0,False,GNB,2024
3,0.333333,0.444444,0.485523,0.523810,0.382353,0.319149,0.591304,0.000000,0.384615,0.222222,...,0.333333,1.000,0.181818,0.143133,0.488095,0.530612,0.0,True,PHI,2024
4,0.242424,0.200000,0.213808,0.142857,0.147059,0.297872,0.086957,0.000000,0.307692,0.222222,...,0.500000,0.500,0.363636,0.406190,0.702381,0.142857,1.0,True,PIT,2024
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
409,0.272727,0.333333,0.523385,0.476190,0.388235,0.446809,0.508696,0.166667,0.230769,0.444444,...,0.833333,1.000,0.363636,0.342360,0.607143,0.204082,0.0,True,TAM,2024
410,0.727273,0.666667,0.688196,0.714286,0.456863,0.510638,0.752174,0.333333,0.230769,0.222222,...,0.166667,1.000,0.363636,0.382979,0.619048,0.346939,1.0,True,CIN,2024
411,0.212121,0.377778,0.273942,0.285714,0.364706,0.280142,0.226087,0.333333,0.692308,0.333333,...,0.333333,0.333,0.272727,0.234043,0.726190,0.489796,0.0,False,DAL,2024
412,0.212121,0.288889,0.222717,0.142857,0.254902,0.202128,0.221739,0.333333,0.307692,0.000000,...,0.166667,0.111,0.636364,0.611219,0.654762,0.061224,1.0,True,LAR,2024


In [802]:
future_cols = ["Tm", "season", "home", "home_opp"]
p_df = p_df.loc[:, future_cols]

In [803]:
p_df

Unnamed: 0,Tm,season,home,home_opp
0,DEN,2024,0,1
1,LAC,2024,1,0
2,HOU,2024,0,1
3,KAN,2024,1,0
4,PIT,2024,0,1
...,...,...,...,...
91,LAR,2024,1,0
92,CLE,2024,0,1
93,BAL,2024,1,0
94,NOR,2024,0,1


In [804]:
# missing_df = [col for col in p_df.columns if col not in df_rolling.columns]
# missing_p_df = [col for col in df_rolling.columns if col not in p_df.columns]

# # Add missing columns to each DataFrame
# for col in missing_df:
#     df_rolling[col] = pd.NA

# for col in missing_p_df:
#     p_df[col] = pd.NA

# df2 = df_rolling[p_df.columns]

# combined_df = pd.concat([p_df, df2], ignore_index=False)
# combined_df = combined_df.sort_values(by="date").reset_index(drop=True)
combined_df = pd.concat([df_rolling, p_df], ignore_index=False).reset_index(drop=True)

combined_df

Unnamed: 0,passcmp,passatt,passyds,pass1d,passiay,passcay,passyac,passdrops,passbadth,passsk,...,defmtkl_max_opp,defmtkl%_max_opp,pnt_max_opp,puntyds_max_opp,puntlng_max_opp,Final_opp,home_opp,won,Tm,season
0,0.515152,0.600000,0.474388,0.380952,0.439216,0.315603,0.573913,0.000000,0.846154,0.111111,...,0.500000,1.000,0.272727,0.278530,0.666667,0.489796,1.0,False,BAL,2024
1,0.333333,0.311111,0.514477,0.571429,0.231373,0.301418,0.669565,0.333333,0.153846,0.222222,...,0.333333,0.500,0.181818,0.145068,0.476190,0.346939,0.0,True,KAN,2024
2,0.242424,0.466667,0.445434,0.285714,0.523529,0.368794,0.452174,0.666667,0.538462,0.222222,...,0.333333,0.667,0.181818,0.195358,0.642857,0.632653,1.0,False,GNB,2024
3,0.333333,0.444444,0.485523,0.523810,0.382353,0.319149,0.591304,0.000000,0.384615,0.222222,...,0.333333,1.000,0.181818,0.143133,0.488095,0.530612,0.0,True,PHI,2024
4,0.242424,0.200000,0.213808,0.142857,0.147059,0.297872,0.086957,0.000000,0.307692,0.222222,...,0.500000,0.500,0.363636,0.406190,0.702381,0.142857,1.0,True,PIT,2024
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
505,,,,,,,,,,,...,,,,,,,0.0,,LAR,2024
506,,,,,,,,,,,...,,,,,,,1.0,,CLE,2024
507,,,,,,,,,,,...,,,,,,,0.0,,BAL,2024
508,,,,,,,,,,,...,,,,,,,1.0,,NOR,2024


In [805]:
combined_df.to_csv("rolling.csv")

In [750]:
def find_team_averages(team):
    numerics = team.select_dtypes(include="number")
    rolling = numerics.rolling(5).mean()
    return rolling

In [806]:
def find_team_averages(team):
    numerics = team.select_dtypes(include="number")
    rolling = numerics.rolling(5, min_periods=5).mean()  # Adjust rolling for available data
    rolling = rolling.fillna(method='ffill')  # Forward-fill missing data for future weeks
    return rolling

In [807]:
# df_rolling = df_rolling.groupby(["Tm", "season"], group_keys=False).apply(find_team_averages)
combined_df = combined_df.groupby(["Tm", "season"], group_keys=False).apply(find_team_averages)

In [808]:
combined_df

Unnamed: 0,passcmp,passatt,passyds,pass1d,passiay,passcay,passyac,passdrops,passbadth,passsk,...,defprss_max_opp,defcomb_max_opp,defmtkl_max_opp,defmtkl%_max_opp,pnt_max_opp,puntyds_max_opp,puntlng_max_opp,Final_opp,home_opp,season
0,,,,,,,,,,,...,,,,,,,,,,
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
505,0.303030,0.328889,0.401336,0.390476,0.368235,0.417021,0.306957,0.100000,0.400000,0.155556,...,0.266667,0.414286,0.266667,0.4688,0.272727,0.260348,0.607143,0.432653,0.4,2024.0
506,0.527273,0.662222,0.560356,0.561905,0.666275,0.641844,0.341739,0.333333,0.630769,0.355556,...,0.333333,0.428571,0.300000,0.3392,0.400000,0.396518,0.557143,0.546939,0.8,2024.0
507,0.309091,0.324444,0.396882,0.352381,0.413333,0.360284,0.367826,0.300000,0.276923,0.155556,...,0.222222,0.357143,0.433333,0.6658,0.345455,0.355126,0.642857,0.383673,0.4,2024.0
508,0.333333,0.373333,0.389310,0.352381,0.370196,0.306383,0.419130,0.200000,0.369231,0.066667,...,0.355556,0.414286,0.266667,0.5666,0.436364,0.388008,0.602381,0.289796,0.6,2024.0


In [809]:
combined_df.to_csv("rolling2.csv")

In [810]:
# rolling_cols = [f"{col}_5" for col in df_rolling.columns]
rolling_cols = [f"{col}_5" for col in combined_df.columns]

In [811]:
# df_rolling.columns = rolling_cols
# df_rolling
combined_df.columns = rolling_cols
combined_df

Unnamed: 0,passcmp_5,passatt_5,passyds_5,pass1d_5,passiay_5,passcay_5,passyac_5,passdrops_5,passbadth_5,passsk_5,...,defprss_max_opp_5,defcomb_max_opp_5,defmtkl_max_opp_5,defmtkl%_max_opp_5,pnt_max_opp_5,puntyds_max_opp_5,puntlng_max_opp_5,Final_opp_5,home_opp_5,season_5
0,,,,,,,,,,,...,,,,,,,,,,
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
505,0.303030,0.328889,0.401336,0.390476,0.368235,0.417021,0.306957,0.100000,0.400000,0.155556,...,0.266667,0.414286,0.266667,0.4688,0.272727,0.260348,0.607143,0.432653,0.4,2024.0
506,0.527273,0.662222,0.560356,0.561905,0.666275,0.641844,0.341739,0.333333,0.630769,0.355556,...,0.333333,0.428571,0.300000,0.3392,0.400000,0.396518,0.557143,0.546939,0.8,2024.0
507,0.309091,0.324444,0.396882,0.352381,0.413333,0.360284,0.367826,0.300000,0.276923,0.155556,...,0.222222,0.357143,0.433333,0.6658,0.345455,0.355126,0.642857,0.383673,0.4,2024.0
508,0.333333,0.373333,0.389310,0.352381,0.370196,0.306383,0.419130,0.200000,0.369231,0.066667,...,0.355556,0.414286,0.266667,0.5666,0.436364,0.388008,0.602381,0.289796,0.6,2024.0


In [928]:
# df = pd.concat([df, df_rolling], axis=1)
# df = pd.concat([df, combined_df], axis=1)
df_test = pd.concat([df, combined_df], axis=1)

In [929]:
df_test

Unnamed: 0,passcmp,passatt,passyds,pass1d,passiay,passcay,passyac,passdrops,passbadth,passsk,...,defprss_max_opp_5,defcomb_max_opp_5,defmtkl_max_opp_5,defmtkl%_max_opp_5,pnt_max_opp_5,puntyds_max_opp_5,puntlng_max_opp_5,Final_opp_5,home_opp_5,season_5
0,0.515152,0.600000,0.474388,0.380952,0.439216,0.315603,0.573913,0.000000,0.846154,0.111111,...,,,,,,,,,,
1,0.333333,0.311111,0.514477,0.571429,0.231373,0.301418,0.669565,0.333333,0.153846,0.222222,...,,,,,,,,,,
2,0.242424,0.466667,0.445434,0.285714,0.523529,0.368794,0.452174,0.666667,0.538462,0.222222,...,,,,,,,,,,
3,0.333333,0.444444,0.485523,0.523810,0.382353,0.319149,0.591304,0.000000,0.384615,0.222222,...,,,,,,,,,,
4,0.242424,0.200000,0.213808,0.142857,0.147059,0.297872,0.086957,0.000000,0.307692,0.222222,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
505,,,,,,,,,,,...,0.266667,0.414286,0.266667,0.4688,0.272727,0.260348,0.607143,0.432653,0.4,2024.0
506,,,,,,,,,,,...,0.333333,0.428571,0.300000,0.3392,0.400000,0.396518,0.557143,0.546939,0.8,2024.0
507,,,,,,,,,,,...,0.222222,0.357143,0.433333,0.6658,0.345455,0.355126,0.642857,0.383673,0.4,2024.0
508,,,,,,,,,,,...,0.355556,0.414286,0.266667,0.5666,0.436364,0.388008,0.602381,0.289796,0.6,2024.0


In [930]:
# df = df.dropna()
rolling_cols = [col for col in df_test.columns if col.endswith('_5')]
df_test = df_test.dropna(subset=rolling_cols)

In [931]:
df_test = df_test.reset_index(drop=True)
df_test

Unnamed: 0,passcmp,passatt,passyds,pass1d,passiay,passcay,passyac,passdrops,passbadth,passsk,...,defprss_max_opp_5,defcomb_max_opp_5,defmtkl_max_opp_5,defmtkl%_max_opp_5,pnt_max_opp_5,puntyds_max_opp_5,puntlng_max_opp_5,Final_opp_5,home_opp_5,season_5
0,0.303030,0.222222,0.267261,0.428571,0.145098,0.177305,0.339130,0.333333,0.153846,0.111111,...,0.311111,0.257143,0.300000,0.4156,0.236364,0.239458,0.578571,0.404082,0.4,2024.0
1,1.000000,1.000000,1.000000,1.000000,0.776471,0.897163,0.886957,0.666667,0.000000,0.444444,...,0.333333,0.357143,0.333333,0.6400,0.218182,0.211992,0.609524,0.408163,0.2,2024.0
2,0.393939,0.466667,0.298441,0.285714,0.439216,0.361702,0.173913,0.166667,0.461538,0.444444,...,0.266667,0.242857,0.366667,0.4066,0.309091,0.313733,0.607143,0.612245,0.6,2024.0
3,0.333333,0.333333,0.543430,0.428571,0.315686,0.368794,0.643478,0.000000,0.461538,0.111111,...,0.311111,0.485714,0.233333,0.3972,0.381818,0.407350,0.721429,0.285714,0.4,2024.0
4,0.515152,0.622222,0.641425,0.714286,0.660784,0.581560,0.573913,0.166667,1.000000,0.111111,...,0.311111,0.428571,0.600000,0.6332,0.381818,0.395745,0.707143,0.453061,0.6,2024.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
377,,,,,,,,,,,...,0.266667,0.414286,0.266667,0.4688,0.272727,0.260348,0.607143,0.432653,0.4,2024.0
378,,,,,,,,,,,...,0.333333,0.428571,0.300000,0.3392,0.400000,0.396518,0.557143,0.546939,0.8,2024.0
379,,,,,,,,,,,...,0.222222,0.357143,0.433333,0.6658,0.345455,0.355126,0.642857,0.383673,0.4,2024.0
380,,,,,,,,,,,...,0.355556,0.414286,0.266667,0.5666,0.436364,0.388008,0.602381,0.289796,0.6,2024.0


In [932]:
df_test.to_csv("otadsof.csv")

In [947]:
df_test.iloc[286]['Tm']
df_part = df_test.iloc[286:].reset_index(drop=True)
df_part

Unnamed: 0,passcmp,passatt,passyds,pass1d,passiay,passcay,passyac,passdrops,passbadth,passsk,...,defprss_max_opp_5,defcomb_max_opp_5,defmtkl_max_opp_5,defmtkl%_max_opp_5,pnt_max_opp_5,puntyds_max_opp_5,puntlng_max_opp_5,Final_opp_5,home_opp_5,season_5
0,,,,,,,,,,,...,0.288889,0.357143,0.166667,0.4900,0.381818,0.411219,0.723810,0.404082,0.6,2024.0
1,,,,,,,,,,,...,0.266667,0.371429,0.266667,0.4686,0.327273,0.354352,0.680952,0.371429,0.4,2024.0
2,,,,,,,,,,,...,0.355556,0.257143,0.333333,0.4550,0.363636,0.381044,0.721429,0.383673,0.6,2024.0
3,,,,,,,,,,,...,0.555556,0.257143,0.366667,0.5950,0.327273,0.316441,0.647619,0.367347,0.4,2024.0
4,,,,,,,,,,,...,0.422222,0.442857,0.366667,0.3832,0.363636,0.368279,0.700000,0.424490,0.6,2024.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
91,,,,,,,,,,,...,0.266667,0.414286,0.266667,0.4688,0.272727,0.260348,0.607143,0.432653,0.4,2024.0
92,,,,,,,,,,,...,0.333333,0.428571,0.300000,0.3392,0.400000,0.396518,0.557143,0.546939,0.8,2024.0
93,,,,,,,,,,,...,0.222222,0.357143,0.433333,0.6658,0.345455,0.355126,0.642857,0.383673,0.4,2024.0
94,,,,,,,,,,,...,0.355556,0.414286,0.266667,0.5666,0.436364,0.388008,0.602381,0.289796,0.6,2024.0


In [948]:
df_part.to_csv("rolling_stats_future.csv")

In [975]:
p_df = pd.read_csv("2024_nfl_prediction_games.csv", index_col=0)
# future_cols = ["Tm","Tm_opp",  "home", "home_opp", "season", "date"]
p_df = p_df.loc[:, future_cols]

In [1007]:
work = df_part.copy()
x = work.fillna(p_df).sort_values(by='date').reset_index(drop=True)

In [1008]:
x

Unnamed: 0,passcmp,passatt,passyds,pass1d,passiay,passcay,passyac,passdrops,passbadth,passsk,...,defprss_max_opp_5,defcomb_max_opp_5,defmtkl_max_opp_5,defmtkl%_max_opp_5,pnt_max_opp_5,puntyds_max_opp_5,puntlng_max_opp_5,Final_opp_5,home_opp_5,season_5
0,,,,,,,,,,,...,0.200000,0.300000,0.433333,0.5890,0.327273,0.287427,0.564286,0.330612,0.4,2024.0
1,,,,,,,,,,,...,0.244444,0.342857,0.400000,0.6666,0.290909,0.288588,0.628571,0.502041,0.4,2024.0
2,,,,,,,,,,,...,0.222222,0.271429,0.200000,0.4668,0.218182,0.217795,0.504762,0.432653,0.6,2024.0
3,,,,,,,,,,,...,0.244444,0.242857,0.266667,0.5732,0.254545,0.270406,0.633333,0.453061,0.4,2024.0
4,,,,,,,,,,,...,0.177778,0.328571,0.300000,0.5888,0.381818,0.415087,0.700000,0.420408,0.4,2024.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
91,,,,,,,,,,,...,0.244444,0.342857,0.400000,0.6666,0.290909,0.288588,0.628571,0.502041,0.6,2024.0
92,,,,,,,,,,,...,0.244444,0.528571,0.300000,0.3234,0.363636,0.343520,0.657143,0.465306,0.2,2024.0
93,,,,,,,,,,,...,0.222222,0.271429,0.200000,0.4668,0.218182,0.217795,0.504762,0.432653,0.6,2024.0
94,,,,,,,,,,,...,0.311111,0.400000,0.433333,0.3766,0.400000,0.451064,0.700000,0.220408,0.6,2024.0


In [1009]:
x.to_csv("future_games.csv")

In [1010]:
df_test2 = df_test.iloc[:286]
df_test2['date'] = pd.to_datetime(df_test2['date'], errors='coerce')

df_test2

Unnamed: 0,passcmp,passatt,passyds,pass1d,passiay,passcay,passyac,passdrops,passbadth,passsk,...,defprss_max_opp_5,defcomb_max_opp_5,defmtkl_max_opp_5,defmtkl%_max_opp_5,pnt_max_opp_5,puntyds_max_opp_5,puntlng_max_opp_5,Final_opp_5,home_opp_5,season_5
0,0.303030,0.222222,0.267261,0.428571,0.145098,0.177305,0.339130,0.333333,0.153846,0.111111,...,0.311111,0.257143,0.300000,0.4156,0.236364,0.239458,0.578571,0.404082,0.4,2024.0
1,1.000000,1.000000,1.000000,1.000000,0.776471,0.897163,0.886957,0.666667,0.000000,0.444444,...,0.333333,0.357143,0.333333,0.6400,0.218182,0.211992,0.609524,0.408163,0.2,2024.0
2,0.393939,0.466667,0.298441,0.285714,0.439216,0.361702,0.173913,0.166667,0.461538,0.444444,...,0.266667,0.242857,0.366667,0.4066,0.309091,0.313733,0.607143,0.612245,0.6,2024.0
3,0.333333,0.333333,0.543430,0.428571,0.315686,0.368794,0.643478,0.000000,0.461538,0.111111,...,0.311111,0.485714,0.233333,0.3972,0.381818,0.407350,0.721429,0.285714,0.4,2024.0
4,0.515152,0.622222,0.641425,0.714286,0.660784,0.581560,0.573913,0.166667,1.000000,0.111111,...,0.311111,0.428571,0.600000,0.6332,0.381818,0.395745,0.707143,0.453061,0.6,2024.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
281,0.272727,0.333333,0.523385,0.476190,0.388235,0.446809,0.508696,0.166667,0.230769,0.444444,...,0.200000,0.300000,0.433333,0.5890,0.327273,0.287427,0.564286,0.330612,0.6,2024.0
282,0.727273,0.666667,0.688196,0.714286,0.456863,0.510638,0.752174,0.333333,0.230769,0.222222,...,0.555556,0.485714,0.233333,0.8334,0.381818,0.397292,0.647619,0.579592,0.6,2024.0
283,0.212121,0.377778,0.273942,0.285714,0.364706,0.280142,0.226087,0.333333,0.692308,0.333333,...,0.333333,0.300000,0.366667,0.3798,0.400000,0.392263,0.666667,0.514286,0.2,2024.0
284,0.212121,0.288889,0.222717,0.142857,0.254902,0.202128,0.221739,0.333333,0.307692,0.000000,...,0.266667,0.414286,0.266667,0.4688,0.272727,0.260348,0.607143,0.432653,0.6,2024.0


In [1063]:
x['date'] = pd.to_datetime(x['date'], errors='coerce')

y = pd.concat([df_test2, x]).reset_index(drop=True).sort_values(by='date').reset_index(drop=True)
y

Unnamed: 0,passcmp,passatt,passyds,pass1d,passiay,passcay,passyac,passdrops,passbadth,passsk,...,defprss_max_opp_5,defcomb_max_opp_5,defmtkl_max_opp_5,defmtkl%_max_opp_5,pnt_max_opp_5,puntyds_max_opp_5,puntlng_max_opp_5,Final_opp_5,home_opp_5,season_5
0,0.303030,0.222222,0.267261,0.428571,0.145098,0.177305,0.339130,0.333333,0.153846,0.111111,...,0.311111,0.257143,0.300000,0.4156,0.236364,0.239458,0.578571,0.404082,0.4,2024.0
1,1.000000,1.000000,1.000000,1.000000,0.776471,0.897163,0.886957,0.666667,0.000000,0.444444,...,0.333333,0.357143,0.333333,0.6400,0.218182,0.211992,0.609524,0.408163,0.2,2024.0
2,0.181818,0.311111,0.391982,0.238095,0.513725,0.453901,0.243478,0.333333,0.384615,0.333333,...,0.200000,0.428571,0.300000,0.4666,0.254545,0.262282,0.535714,0.408163,0.6,2024.0
3,0.212121,0.333333,0.180401,0.190476,0.313725,0.152482,0.200000,0.333333,0.461538,0.777778,...,0.488889,0.371429,0.266667,0.4916,0.436364,0.477756,0.716667,0.432653,0.6,2024.0
4,0.303030,0.466667,0.409800,0.476190,0.496078,0.436170,0.300000,0.333333,0.384615,0.222222,...,0.266667,0.414286,0.333333,0.5400,0.272727,0.286267,0.678571,0.371429,0.4,2024.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
377,,,,,,,,,,,...,0.333333,0.428571,0.300000,0.3392,0.400000,0.396518,0.557143,0.546939,0.8,2024.0
378,,,,,,,,,,,...,0.222222,0.357143,0.433333,0.6658,0.345455,0.355126,0.642857,0.383673,0.4,2024.0
379,,,,,,,,,,,...,0.377778,0.328571,0.366667,0.4532,0.363636,0.335397,0.583333,0.416327,0.6,2024.0
380,,,,,,,,,,,...,0.155556,0.428571,0.300000,0.5000,0.272727,0.282012,0.657143,0.367347,0.6,2024.0


In [1065]:
y = y.groupby("Tm", group_keys=False).apply(add_target)

y["target"][pd.isnull(y["target"])] = 2
y["target"] = y["target"].astype(int, errors="ignore")

In [1066]:
y["target"].value_counts()

target
0    128
2    128
1    126
Name: count, dtype: int64

In [1067]:
y.to_csv("all_games.csv")

In [1069]:
y2 = y.copy()

In [1068]:
def shift_col(team, col_name):
    next_col = team[col_name].shift(-1)
    return next_col

def add_col(df, col_name):
    return df.groupby("Tm", group_keys=False).apply(lambda x: shift_col(x, col_name))

In [1072]:
# df["home_next"] = add_col(df, "home")
# df["team_opp_next"] = add_col(df, "Tm_opp")
# df["date_next"] = add_col(df, "date")

y2["home_next"] = add_col(y2, "home")
y2["team_opp_next"] = add_col(y2, "Tm_opp")
y2["date_next"] = add_col(y2, "date")

y2

Unnamed: 0,passcmp,passatt,passyds,pass1d,passiay,passcay,passyac,passdrops,passbadth,passsk,...,defmtkl%_max_opp_5,pnt_max_opp_5,puntyds_max_opp_5,puntlng_max_opp_5,Final_opp_5,home_opp_5,season_5,home_next,team_opp_next,date_next
0,0.303030,0.222222,0.267261,0.428571,0.145098,0.177305,0.339130,0.333333,0.153846,0.111111,...,0.4156,0.236364,0.239458,0.578571,0.404082,0.4,2024.0,0.0,NOR,2024-10-13
1,1.000000,1.000000,1.000000,1.000000,0.776471,0.897163,0.886957,0.666667,0.000000,0.444444,...,0.6400,0.218182,0.211992,0.609524,0.408163,0.2,2024.0,0.0,CAR,2024-10-13
2,0.181818,0.311111,0.391982,0.238095,0.513725,0.453901,0.243478,0.333333,0.384615,0.333333,...,0.4666,0.254545,0.262282,0.535714,0.408163,0.6,2024.0,0.0,BAL,2024-10-13
3,0.212121,0.333333,0.180401,0.190476,0.313725,0.152482,0.200000,0.333333,0.461538,0.777778,...,0.4916,0.436364,0.477756,0.716667,0.432653,0.6,2024.0,0.0,PHI,2024-10-13
4,0.303030,0.466667,0.409800,0.476190,0.496078,0.436170,0.300000,0.333333,0.384615,0.222222,...,0.5400,0.272727,0.286267,0.678571,0.371429,0.4,2024.0,0.0,SEA,2024-10-10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
377,,,,,,,,,,,...,0.3392,0.400000,0.396518,0.557143,0.546939,0.8,2024.0,,,NaT
378,,,,,,,,,,,...,0.6658,0.345455,0.355126,0.642857,0.383673,0.4,2024.0,,,NaT
379,,,,,,,,,,,...,0.4532,0.363636,0.335397,0.583333,0.416327,0.6,2024.0,,,NaT
380,,,,,,,,,,,...,0.5000,0.272727,0.282012,0.657143,0.367347,0.6,2024.0,,,NaT


In [1073]:
y2.to_csv("combined.csv")

In [1074]:
full = y2.merge(y2[rolling_cols + ["team_opp_next", "date_next", "Tm"]], 
                left_on=["Tm", "date_next"], right_on=["team_opp_next", "date_next"])

In [1077]:
full

Unnamed: 0,passcmp,passatt,passyds,pass1d,passiay,passcay,passyac,passdrops,passbadth,passsk,...,defmtkl_max_opp_5_y,defmtkl%_max_opp_5_y,pnt_max_opp_5_y,puntyds_max_opp_5_y,puntlng_max_opp_5_y,Final_opp_5_y,home_opp_5_y,season_5_y,team_opp_next_y,Tm_y
0,0.303030,0.222222,0.267261,0.428571,0.145098,0.177305,0.339130,0.333333,0.153846,0.111111,...,0.133333,0.7000,0.200000,0.183752,0.597619,0.330612,0.6,2024.0,TAM,NOR
1,1.000000,1.000000,1.000000,1.000000,0.776471,0.897163,0.886957,0.666667,0.000000,0.444444,...,0.366667,0.4066,0.309091,0.313733,0.607143,0.612245,0.6,2024.0,ATL,CAR
2,0.181818,0.311111,0.391982,0.238095,0.513725,0.453901,0.243478,0.333333,0.384615,0.333333,...,0.600000,0.6332,0.381818,0.395745,0.707143,0.453061,0.6,2024.0,WAS,BAL
3,0.303030,0.466667,0.409800,0.476190,0.496078,0.436170,0.300000,0.333333,0.384615,0.222222,...,0.400000,0.7666,0.490909,0.516054,0.700000,0.404082,0.4,2024.0,SFO,SEA
4,0.303030,0.355556,0.300668,0.333333,0.343137,0.294326,0.260870,0.000000,0.384615,0.111111,...,0.366667,0.4992,0.309091,0.310638,0.652381,0.379592,0.6,2024.0,ARI,GNB
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
341,,,,,,,,,,,...,0.333333,0.4550,0.363636,0.381044,0.721429,0.383673,0.6,2024.0,TEN,HOU
342,,,,,,,,,,,...,0.200000,0.4668,0.218182,0.217795,0.504762,0.432653,0.6,2024.0,IND,JAX
343,,,,,,,,,,,...,0.433333,0.6658,0.345455,0.355126,0.642857,0.383673,0.6,2024.0,CLE,BAL
344,,,,,,,,,,,...,0.366667,0.6230,0.327273,0.349323,0.702381,0.367347,0.8,2024.0,SFO,ARI


In [1085]:
full.to_csv("full.csv")

In [1078]:
full[["Tm_x", "team_opp_next_x", "Tm_y", "team_opp_next_y", "date_next"]]

Unnamed: 0,Tm_x,team_opp_next_x,Tm_y,team_opp_next_y,date_next
0,TAM,NOR,NOR,TAM,2024-10-13
1,ATL,CAR,CAR,ATL,2024-10-13
2,WAS,BAL,BAL,WAS,2024-10-13
3,SFO,SEA,SEA,SFO,2024-10-10
4,ARI,GNB,GNB,ARI,2024-10-13
...,...,...,...,...,...
341,TEN,HOU,HOU,TEN,2025-01-05
342,IND,JAX,JAX,IND,2025-01-05
343,CLE,BAL,BAL,CLE,2025-01-05
344,SFO,ARI,ARI,SFO,2025-01-05


In [1079]:
removed_cols = list(full.columns[full.dtypes == "object"]) + removed_cols

In [1080]:
removed_cols

['Tm_x',
 'Tm_opp',
 'won',
 'team_opp_next_x',
 'team_opp_next_y',
 'Tm_y',
 'season',
 'date',
 'won',
 'target',
 'Tm',
 'Tm_opp',
 'week']

In [1081]:
selected_cols = full.columns[~full.columns.isin(removed_cols)]
numeric_cols = full[selected_cols].select_dtypes(include=["number"]).columns

In [1082]:
numeric_cols

Index(['passcmp', 'passatt', 'passyds', 'pass1d', 'passiay', 'passcay',
       'passyac', 'passdrops', 'passbadth', 'passsk',
       ...
       'defprss_max_opp_5_y', 'defcomb_max_opp_5_y', 'defmtkl_max_opp_5_y',
       'defmtkl%_max_opp_5_y', 'pnt_max_opp_5_y', 'puntyds_max_opp_5_y',
       'puntlng_max_opp_5_y', 'Final_opp_5_y', 'home_opp_5_y', 'season_5_y'],
      dtype='object', length=798)

In [1092]:
imputer = SimpleImputer(strategy='mean')
full[numeric_cols] = imputer.fit_transform(full[numeric_cols])

In [1093]:
sfs.fit(full[numeric_cols], full["target"])

In [1094]:
predictors = list(numeric_cols[sfs.get_support()])

In [1099]:
predictors

['defint',
 'defcmp',
 'defsk',
 'kickrettd',
 'passatt_max',
 'passdrop%_max',
 'rushyac_max',
 'defcmp%_max',
 'defdadot_max',
 'puntlng_max',
 'passcmp_opp',
 'passsk_opp',
 'rectgt_opp',
 'rec_opp',
 'kickrettd_opp',
 'puntrettd_opp',
 'fgm_opp',
 'fga_opp',
 'passcmp_max_opp',
 'passcay/pa_max_opp',
 'passsk_max_opp',
 'rushybc/att_max_opp',
 'defyds/cmp_max_opp',
 'rushbrktkl_5_x',
 'kickrettd_5_x',
 'defdadot_max_5_x',
 'rushbrktkl_opp_5_x',
 'deftd_opp_5_x',
 'kickretlng_opp_5_y',
 'defrat_max_opp_5_y']

In [1119]:
games_played = full[full["target"] != 2]
games_to_predict = full[full["target"] == 2]

# predictions = backtest_single_season(full, rr, predictors)
model = RidgeClassifier()

# Train the model
model.fit(games_played[predictors], games_played["target"])

# Make predictions for the remaining games
predictions = model.predict(games_to_predict[predictors])

# Store the predictions in the original DataFrame (e.g., in the 'target' column)
games_to_predict['target'] = predictions

In [1121]:
predictions

array([0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0,
       0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1,
       0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1,
       0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1,
       0, 1, 1, 1, 1, 0, 1, 1])

In [1156]:
games_to_predict_sorted = games_to_predict.sort_values(by=['date']).reset_index(drop=True)

cols = ["Tm_x", "Tm_opp", "date", "won", "target"] + [col for col in games_to_predict_sorted.columns if col not in 
                                                      ["Tm_x", "Tm_opp", "date", "won", "target"]]
games_to_predict_sorted = games_to_predict_sorted[cols]

In [1157]:
games_to_predict_sorted

Unnamed: 0,Tm_x,Tm_opp,date,won,target,passcmp,passatt,passyds,pass1d,passiay,...,defmtkl_max_opp_5_y,defmtkl%_max_opp_5_y,pnt_max_opp_5_y,puntyds_max_opp_5_y,puntlng_max_opp_5_y,Final_opp_5_y,home_opp_5_y,season_5_y,team_opp_next_y,Tm_y
0,GNB,MIA,2024-11-28,True,0,0.363636,0.311111,0.476615,0.380952,0.301961,...,0.266667,0.5666,0.436364,0.388008,0.602381,0.289796,0.4,2024.0,GNB,NOR
1,DET,CHI,2024-11-28,True,0,0.363636,0.444444,0.358575,0.428571,0.305882,...,0.233333,0.4566,0.254545,0.267311,0.657143,0.469388,0.4,2024.0,DET,CHI
2,BAL,PHI,2024-12-01,False,0,0.424242,0.488889,0.394209,0.380952,0.617647,...,0.366667,0.3832,0.363636,0.368279,0.700000,0.424490,0.6,2024.0,BAL,PIT
3,WAS,TEN,2024-12-01,True,1,0.484848,0.355556,0.325167,0.428571,0.221569,...,0.300000,0.7334,0.381818,0.373308,0.621429,0.261224,0.6,2024.0,WAS,PHI
4,NWE,IND,2024-12-01,False,1,0.454545,0.355556,0.396437,0.523810,0.296078,...,0.300000,0.4044,0.218182,0.215474,0.592857,0.436735,0.4,2024.0,NWE,BUF
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
91,CAR,TAM,2024-12-29,,1,0.377391,0.426005,0.400780,0.408646,0.413016,...,0.400000,0.6666,0.290909,0.288588,0.628571,0.502041,0.6,2024.0,CAR,ATL
92,PHI,DAL,2024-12-29,,1,0.377391,0.426005,0.400780,0.408646,0.413016,...,0.200000,0.2650,0.418182,0.407737,0.628571,0.420408,0.4,2024.0,PHI,NYG
93,NYG,IND,2024-12-29,,0,0.377391,0.426005,0.400780,0.408646,0.413016,...,0.300000,0.7334,0.381818,0.373308,0.621429,0.261224,0.6,2024.0,NYG,PHI
94,SFO,DET,2024-12-30,,1,0.377391,0.426005,0.400780,0.408646,0.413016,...,0.366667,0.6230,0.327273,0.349323,0.702381,0.367347,0.8,2024.0,SFO,ARI


In [1159]:
games_to_predict_sorted.to_csv("predictions.csv")