In [1]:
# Import packages

import pandas as pd 
import numpy as np 
import os

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

from joblib import dump, load

pd.set_option('display.max_columns', None)

In [2]:
stats = pd.read_csv('../../data/value_models_combined_6_game_rolling.csv')
schedule = pd.read_csv('../../data/schedule_final.csv')

In [3]:
stats.head()

Unnamed: 0.1,Unnamed: 0,season,week,team,qb,passing_value,rushing_value,qb_rushing_value_pct,pass_def_value,rush_def_value,team_full
0,0,2014,1,ARI,C. Palmer,,,,,,Arizona Cardinals
1,1,2014,1,ATL,M. Ryan,,,,,,Atlanta Falcons
2,2,2014,1,BAL,J. Flacco,,,,,,Baltimore Ravens
3,3,2014,1,BUF,E. Manuel,,,,,,Buffalo Bills
4,4,2014,1,CAR,D. Anderson,,,,,,Carolina Panthers


In [4]:
schedule.head()

Unnamed: 0.1,Unnamed: 0,date,season,week,season_type,home,away,home_score,away_score,home_qb,away_qb,home_qb_abv,away_qb_abv
0,0,2014-09-04,2014,1,REG,Seattle Seahawks,Green Bay Packers,36,16,Russell Wilson,Aaron Rodgers,R. Wilson,A. Rodgers
1,1,2014-09-07,2014,1,REG,Denver Broncos,Indianapolis Colts,31,24,Peyton Manning,Andrew Luck,P. Manning,A. Luck
2,2,2014-09-07,2014,1,REG,Philadelphia Eagles,Jacksonville Jaguars,34,17,Nick Foles,Chad Henne,N. Foles,C. Henne
3,3,2014-09-07,2014,1,REG,Kansas City Chiefs,Tennessee Titans,10,26,Alex Smith,Jake Locker,A. Smith,J. Locker
4,4,2014-09-07,2014,1,REG,Los Angeles Rams,Minnesota Vikings,6,34,Shaun Hill,Matt Cassel,S. Hill,M. Cassel


In [5]:
print(len(stats))
print(len(schedule))

4745
2291


In [6]:
nfl = schedule.copy()
nfl = nfl.merge(stats.add_suffix('_home'), how='left', left_on = ['season', 'week', 'home', 'home_qb_abv'], 
                right_on = ['season_home', 'week_home', 'team_full_home', 'qb_home'])
nfl = nfl.merge(stats.add_suffix('_away'), how='left', left_on = ['season', 'week', 'away', 'away_qb_abv'], 
                right_on = ['season_away', 'week_away', 'team_full_away', 'qb_away'])
nfl = nfl[['date', 'season', 'week', 'season_type', 'home', 'away', 'home_score', 'away_score', 'home_qb', 'away_qb',
          'passing_value_home', 'rushing_value_home', 'qb_rushing_value_pct_home', 'pass_def_value_home', 'rush_def_value_home',
          'passing_value_away', 'rushing_value_away', 'qb_rushing_value_pct_away', 'pass_def_value_away', 'rush_def_value_away']]
nfl.head()

Unnamed: 0,date,season,week,season_type,home,away,home_score,away_score,home_qb,away_qb,passing_value_home,rushing_value_home,qb_rushing_value_pct_home,pass_def_value_home,rush_def_value_home,passing_value_away,rushing_value_away,qb_rushing_value_pct_away,pass_def_value_away,rush_def_value_away
0,2014-09-04,2014,1,REG,Seattle Seahawks,Green Bay Packers,36,16,Russell Wilson,Aaron Rodgers,,,,,,,,,,
1,2014-09-07,2014,1,REG,Denver Broncos,Indianapolis Colts,31,24,Peyton Manning,Andrew Luck,,,,,,,,,,
2,2014-09-07,2014,1,REG,Philadelphia Eagles,Jacksonville Jaguars,34,17,Nick Foles,Chad Henne,,,,,,,,,,
3,2014-09-07,2014,1,REG,Kansas City Chiefs,Tennessee Titans,10,26,Alex Smith,Jake Locker,,,,,,,,,,
4,2014-09-07,2014,1,REG,Los Angeles Rams,Minnesota Vikings,6,34,Shaun Hill,Matt Cassel,,,,,,,,,,


In [7]:
print(len(nfl))

2291


In [8]:
# Remove any ties, add binary label for home win

nfl_cleaned = nfl.copy()
nfl_cleaned = nfl_cleaned[nfl_cleaned.home_score != nfl_cleaned.away_score]
nfl_cleaned['home_win'] = nfl_cleaned.apply(lambda x: 1 if x.home_score>x.away_score else 0, axis=1)
nfl_cleaned

Unnamed: 0,date,season,week,season_type,home,away,home_score,away_score,home_qb,away_qb,passing_value_home,rushing_value_home,qb_rushing_value_pct_home,pass_def_value_home,rush_def_value_home,passing_value_away,rushing_value_away,qb_rushing_value_pct_away,pass_def_value_away,rush_def_value_away,home_win
0,2014-09-04,2014,1,REG,Seattle Seahawks,Green Bay Packers,36,16,Russell Wilson,Aaron Rodgers,,,,,,,,,,,1
1,2014-09-07,2014,1,REG,Denver Broncos,Indianapolis Colts,31,24,Peyton Manning,Andrew Luck,,,,,,,,,,,1
2,2014-09-07,2014,1,REG,Philadelphia Eagles,Jacksonville Jaguars,34,17,Nick Foles,Chad Henne,,,,,,,,,,,1
3,2014-09-07,2014,1,REG,Kansas City Chiefs,Tennessee Titans,10,26,Alex Smith,Jake Locker,,,,,,,,,,,0
4,2014-09-07,2014,1,REG,Los Angeles Rams,Minnesota Vikings,6,34,Shaun Hill,Matt Cassel,,,,,,,,,,,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2286,2022-11-06,2022,9,REG,Arizona Cardinals,Seattle Seahawks,21,31,Kyler Murray,Geno Smith,0.376204,0.567298,0.337853,0.538769,0.495217,0.551992,0.576819,0.164394,0.483338,0.476432,0
2287,2022-11-06,2022,9,REG,Tampa Bay Buccaneers,Los Angeles Rams,16,13,Tom Brady,Matthew Stafford,0.519865,0.292861,0.007250,0.379830,0.365571,0.413164,0.379353,0.026539,0.542531,0.483446,1
2288,2022-11-06,2022,9,REG,Kansas City Chiefs,Tennessee Titans,20,17,Patrick Mahomes,Malik Willis,0.705017,0.638432,0.200027,0.400367,0.481490,0.002910,0.893241,0.025316,0.499462,0.608670,1
2289,2022-11-07,2022,9,REG,New Orleans Saints,Baltimore Ravens,13,27,Andy Dalton,Lamar Jackson,0.531013,0.672527,0.271606,0.520587,0.474631,0.285797,0.819492,0.387636,0.480265,0.356171,0


In [20]:
# Split data into training, validation, and testing sets

train_df = nfl_cleaned[(nfl_cleaned.season <= 2020) | ((nfl_cleaned.season == 2021) & (nfl_cleaned.week <= 8))]
train_df = train_df.dropna()

val_df = nfl_cleaned[((nfl_cleaned.season == 2021) & (nfl_cleaned.week > 8))]
val_df = val_df.dropna()

test_df = nfl_cleaned[nfl_cleaned.season == 2022]
test_df = test_df.dropna()

In [21]:
# Prepare feature list

feature_list = ['passing_value_home', 'rushing_value_home', 'pass_def_value_home', 'rush_def_value_home',
                   'passing_value_away', 'rushing_value_away', 'pass_def_value_away', 'rush_def_value_away']

In [32]:
# Get features, labels for train, val, and test sets

train_x = train_df[feature_list].to_numpy()
train_y = train_df.home_win.to_numpy()

val_x = val_df[feature_list].to_numpy()
val_y = val_df.home_win.to_numpy()

test_x = test_df[feature_list]
test_y = test_df.home_win.to_numpy()