In [57]:
# Import packages

import pandas as pd 
import numpy as np 
import os

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_validate
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV

import xgboost as xgb

from joblib import dump, load

pd.set_option('display.max_columns', None)

In [58]:
stats = pd.read_csv('../../data/adjusted_value_models_combined_6_game_rolling.csv')
schedule = pd.read_csv('../../data/schedule_final.csv')
betting = pd.read_csv('../../data/betting_data_cleaned_with_returns.csv')
betting = betting.drop(columns=['Unnamed: 0'])

stats = stats.dropna()

In [59]:
stats.head()

Unnamed: 0.1,Unnamed: 0,season,week,team,qb,qb_adjusted_value,rushing_adjusted_value,qb_rushing_value_pct,qb_def_adjusted_value,rush_def_adjusted_value,special_teams_value,team_full
64,64,2014,3,ARI,D. Stanton,-1.157754,0.332257,0.0,-0.737241,0.919733,0.811753,Arizona Cardinals
65,65,2014,3,ATL,M. Ryan,-1.307454,0.874004,0.0,-0.879286,-0.600544,0.801254,Atlanta Falcons
66,66,2014,3,BAL,J. Flacco,0.652041,-0.098594,0.0,0.629275,-0.397593,0.040821,Baltimore Ravens
67,67,2014,3,BUF,E. Manuel,0.603547,-2.148207,0.0,1.100186,1.118419,1.432238,Buffalo Bills
68,68,2014,3,CAR,C. Newton,0.767364,-0.738487,0.0,0.730025,0.510499,0.963767,Carolina Panthers


In [60]:
stats

Unnamed: 0.1,Unnamed: 0,season,week,team,qb,qb_adjusted_value,rushing_adjusted_value,qb_rushing_value_pct,qb_def_adjusted_value,rush_def_adjusted_value,special_teams_value,team_full
64,64,2014,3,ARI,D. Stanton,-1.157754,0.332257,0.0,-0.737241,0.919733,0.811753,Arizona Cardinals
65,65,2014,3,ATL,M. Ryan,-1.307454,0.874004,0.0,-0.879286,-0.600544,0.801254,Atlanta Falcons
66,66,2014,3,BAL,J. Flacco,0.652041,-0.098594,0.0,0.629275,-0.397593,0.040821,Baltimore Ravens
67,67,2014,3,BUF,E. Manuel,0.603547,-2.148207,0.0,1.100186,1.118419,1.432238,Buffalo Bills
68,68,2014,3,CAR,C. Newton,0.767364,-0.738487,0.0,0.730025,0.510499,0.963767,Carolina Panthers
...,...,...,...,...,...,...,...,...,...,...,...,...
4856,4856,2022,21,CIN,J. Burrow,0.235993,0.222464,0.0,0.210351,0.832600,0.139439,Cincinnati Bengals
4857,4857,2022,21,KC,P. Mahomes,0.626274,0.390027,0.0,0.641969,-0.456237,-0.444345,Kansas City Chiefs
4858,4858,2022,21,PHI,J. Hurts,0.756473,2.287960,0.0,0.513945,-0.159588,0.134125,Philadelphia Eagles
4860,4860,2022,22,KC,P. Mahomes,0.666601,0.280515,0.0,0.579033,-0.464682,-0.538741,Kansas City Chiefs


In [61]:
schedule.head()

Unnamed: 0.1,Unnamed: 0,date,season,week,season_type,home,away,home_score,away_score,home_qb,away_qb,home_qb_abv,away_qb_abv
0,0,2014-09-04,2014,1,REG,Seattle Seahawks,Green Bay Packers,36,16,Russell Wilson,Aaron Rodgers,R. Wilson,A. Rodgers
1,1,2014-09-07,2014,1,REG,Baltimore Ravens,Cincinnati Bengals,16,23,Joe Flacco,Andy Dalton,J. Flacco,A. Dalton
2,2,2014-09-07,2014,1,REG,Houston Texans,Washington Football Team,17,6,Ryan Fitzpatrick,Robert Griffin,R. Fitzpatrick,R. Griffin
3,3,2014-09-07,2014,1,REG,Chicago Bears,Buffalo Bills,20,23,Jay Cutler,EJ Manuel,J. Cutler,E. Manuel
4,4,2014-09-07,2014,1,REG,Pittsburgh Steelers,Cleveland Browns,30,27,Ben Roethlisberger,Brian Hoyer,B. Roethlisberger,B. Hoyer


In [62]:
print(len(stats))
print(len(schedule))

4555
2438


In [63]:
nfl = schedule.copy()
nfl = nfl.merge(stats.add_suffix('_home'), how='left', left_on = ['season', 'week', 'home', 'home_qb_abv'], 
                right_on = ['season_home', 'week_home', 'team_full_home', 'qb_home'])
nfl = nfl.merge(stats.add_suffix('_away'), how='left', left_on = ['season', 'week', 'away', 'away_qb_abv'], 
                right_on = ['season_away', 'week_away', 'team_full_away', 'qb_away'])
nfl = nfl[['date', 'season', 'week', 'season_type', 'home', 'away', 'home_score', 'away_score', 'home_qb', 'away_qb',
          'qb_adjusted_value_home', 'rushing_adjusted_value_home', 'qb_rushing_value_pct_home', 'qb_def_adjusted_value_home', 'rush_def_adjusted_value_home', 'special_teams_value_home',
          'qb_adjusted_value_away', 'rushing_adjusted_value_away', 'qb_rushing_value_pct_away', 'qb_def_adjusted_value_away', 'rush_def_adjusted_value_away', 'special_teams_value_away']]
nfl.head()

Unnamed: 0,date,season,week,season_type,home,away,home_score,away_score,home_qb,away_qb,qb_adjusted_value_home,rushing_adjusted_value_home,qb_rushing_value_pct_home,qb_def_adjusted_value_home,rush_def_adjusted_value_home,special_teams_value_home,qb_adjusted_value_away,rushing_adjusted_value_away,qb_rushing_value_pct_away,qb_def_adjusted_value_away,rush_def_adjusted_value_away,special_teams_value_away
0,2014-09-04,2014,1,REG,Seattle Seahawks,Green Bay Packers,36,16,Russell Wilson,Aaron Rodgers,,,,,,,,,,,,
1,2014-09-07,2014,1,REG,Baltimore Ravens,Cincinnati Bengals,16,23,Joe Flacco,Andy Dalton,,,,,,,,,,,,
2,2014-09-07,2014,1,REG,Houston Texans,Washington Football Team,17,6,Ryan Fitzpatrick,Robert Griffin,,,,,,,,,,,,
3,2014-09-07,2014,1,REG,Chicago Bears,Buffalo Bills,20,23,Jay Cutler,EJ Manuel,,,,,,,,,,,,
4,2014-09-07,2014,1,REG,Pittsburgh Steelers,Cleveland Browns,30,27,Ben Roethlisberger,Brian Hoyer,,,,,,,,,,,,


In [64]:
print(len(nfl))

2438


In [65]:
betting.head()

Unnamed: 0,date,home_team,away_team,home_score,away_score,total_score,home_moneyline,away_moneyline,home_spread,away_spread,total_score.1,over_under_result,home_team_actual_line,away_team_actual_line,spread_cover_result,home_implied_prob,away_implied_prob,game_winner,favorite_return,underdog_return,home_return,away_return,favorite_spread_return,underdog_spread_return,home_spread_return,away_spread_return,over_return,under_return
0,2023-02-12,Philadelphia Eagles,Kansas City Chiefs,35,38,73,-120.0,100.0,-1.0,1.0,51.5,over,3,-3,away,0.545455,0.5,away,-100.0,100.0,-100.0,100.0,-100.0,90.91,-100.0,90.91,90.91,-100.0
1,2023-01-29,Kansas City Chiefs,Cincinnati Bengals,23,20,43,-130.0,110.0,-2.0,2.0,48.5,under,-3,3,home,0.565217,0.47619,home,76.92,-100.0,76.92,-100.0,90.91,-100.0,90.91,-100.0,-100.0,90.91
2,2023-01-29,Philadelphia Eagles,San Francisco 49ers,31,7,38,-155.0,135.0,-3.0,3.0,45.0,under,-24,24,home,0.607843,0.425532,home,64.52,-100.0,64.52,-100.0,90.91,-100.0,90.91,-100.0,-100.0,90.91
3,2023-01-22,San Francisco 49ers,Dallas Cowboys,19,12,31,-200.0,170.0,-3.5,3.5,46.5,under,-7,7,home,0.666667,0.37037,home,50.0,-100.0,50.0,-100.0,90.91,-100.0,90.91,-100.0,-100.0,90.91
4,2023-01-22,Buffalo Bills,Cincinnati Bengals,10,27,37,-260.0,215.0,-6.0,6.0,48.5,under,17,-17,away,0.722222,0.31746,away,-100.0,215.0,-100.0,215.0,-100.0,90.91,-100.0,90.91,-100.0,90.91


In [66]:
nfl.head()

Unnamed: 0,date,season,week,season_type,home,away,home_score,away_score,home_qb,away_qb,qb_adjusted_value_home,rushing_adjusted_value_home,qb_rushing_value_pct_home,qb_def_adjusted_value_home,rush_def_adjusted_value_home,special_teams_value_home,qb_adjusted_value_away,rushing_adjusted_value_away,qb_rushing_value_pct_away,qb_def_adjusted_value_away,rush_def_adjusted_value_away,special_teams_value_away,home_win
0,2014-09-04,2014,1,REG,Seattle Seahawks,Green Bay Packers,36,16,Russell Wilson,Aaron Rodgers,,,,,,,,,,,,,1
1,2014-09-07,2014,1,REG,Baltimore Ravens,Cincinnati Bengals,16,23,Joe Flacco,Andy Dalton,,,,,,,,,,,,,0
2,2014-09-07,2014,1,REG,Houston Texans,Washington Football Team,17,6,Ryan Fitzpatrick,Robert Griffin,,,,,,,,,,,,,1
3,2014-09-07,2014,1,REG,Chicago Bears,Buffalo Bills,20,23,Jay Cutler,EJ Manuel,,,,,,,,,,,,,0
4,2014-09-07,2014,1,REG,Pittsburgh Steelers,Cleveland Browns,30,27,Ben Roethlisberger,Brian Hoyer,,,,,,,,,,,,,1


In [67]:
# Join in betting data to get point spreads

nfl_cleaned_with_betting = nfl.merge(betting, left_on=['date', 'home', 'away'],
                                            right_on=['date', 'home_team','away_team'])

In [68]:
# Remove any pushes, add binary label for home cover

nfl_cleaned_with_betting_final = nfl_cleaned_with_betting.copy()
nfl_cleaned_with_betting_final['home_cover'] = nfl_cleaned_with_betting_final.apply(
    lambda x: 1 if x.spread_cover_result == 'home' else 0, axis=1)
nfl_cleaned_with_betting_final = nfl_cleaned_with_betting_final[nfl_cleaned_with_betting_final.spread_cover_result != 'push']
nfl_cleaned_with_betting_final

Unnamed: 0,date,season,week,season_type,home,away,home_score_x,away_score_x,home_qb,away_qb,qb_adjusted_value_home,rushing_adjusted_value_home,qb_rushing_value_pct_home,qb_def_adjusted_value_home,rush_def_adjusted_value_home,special_teams_value_home,qb_adjusted_value_away,rushing_adjusted_value_away,qb_rushing_value_pct_away,qb_def_adjusted_value_away,rush_def_adjusted_value_away,special_teams_value_away,home_win,home_team,away_team,home_score_y,away_score_y,total_score,home_moneyline,away_moneyline,home_spread,away_spread,total_score.1,over_under_result,home_team_actual_line,away_team_actual_line,spread_cover_result,home_implied_prob,away_implied_prob,game_winner,favorite_return,underdog_return,home_return,away_return,favorite_spread_return,underdog_spread_return,home_spread_return,away_spread_return,over_return,under_return,home_cover
0,2014-09-04,2014,1,REG,Seattle Seahawks,Green Bay Packers,36,16,Russell Wilson,Aaron Rodgers,,,,,,,,,,,,,1,Seattle Seahawks,Green Bay Packers,36,16,52,-230.0,205.0,-4.5,4.5,46.5,over,-20,20,home,0.696970,0.327869,home,43.48,-100.0,43.48,-100.0,90.91,-100.00,90.91,-100.00,90.91,-100.00,1
1,2014-09-07,2014,1,REG,Baltimore Ravens,Cincinnati Bengals,16,23,Joe Flacco,Andy Dalton,,,,,,,,,,,,,0,Baltimore Ravens,Cincinnati Bengals,16,23,39,-113.0,102.0,-1.0,1.0,43.5,under,7,-7,away,0.530516,0.495050,away,-100.00,102.0,-100.00,102.0,-100.00,90.91,-100.00,90.91,-100.00,90.91,0
2,2014-09-07,2014,1,REG,Houston Texans,Washington Football Team,17,6,Ryan Fitzpatrick,Robert Griffin,,,,,,,,,,,,,1,Houston Texans,Washington Football Team,17,6,23,-177.0,159.0,-3.0,3.0,43.5,under,-11,11,home,0.638989,0.386100,home,56.50,-100.0,56.50,-100.0,90.91,-100.00,90.91,-100.00,-100.00,90.91,1
3,2014-09-07,2014,1,REG,Chicago Bears,Buffalo Bills,20,23,Jay Cutler,EJ Manuel,,,,,,,,,,,,,0,Chicago Bears,Buffalo Bills,20,23,43,-270.0,239.0,-7.0,7.0,47.0,under,3,-3,away,0.729730,0.294985,away,-100.00,239.0,-100.00,239.0,-100.00,90.91,-100.00,90.91,-100.00,90.91,0
4,2014-09-07,2014,1,REG,Pittsburgh Steelers,Cleveland Browns,30,27,Ben Roethlisberger,Brian Hoyer,,,,,,,,,,,,,1,Pittsburgh Steelers,Cleveland Browns,30,27,57,-242.0,216.0,-5.5,5.5,41.5,over,-3,3,away,0.707602,0.316456,home,41.32,-100.0,41.32,-100.0,-100.00,90.91,-100.00,90.91,90.91,-100.00,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2392,2023-01-22,2022,20,POST,Buffalo Bills,Cincinnati Bengals,10,27,Josh Allen,Joe Burrow,0.285539,0.625543,0.0,0.660321,0.512648,0.183350,0.310046,-0.006602,0.0,0.113476,0.482189,0.151149,0,Buffalo Bills,Cincinnati Bengals,10,27,37,-260.0,215.0,-6.0,6.0,48.5,under,17,-17,away,0.722222,0.317460,away,-100.00,215.0,-100.00,215.0,-100.00,90.91,-100.00,90.91,-100.00,90.91,0
2393,2023-01-22,2022,20,POST,San Francisco 49ers,Dallas Cowboys,19,12,Brock Purdy,Dak Prescott,0.764975,0.633274,0.0,0.686924,0.583874,0.178761,0.207053,-0.634150,0.0,-0.271663,1.501824,-0.517502,1,San Francisco 49ers,Dallas Cowboys,19,12,31,-200.0,170.0,-3.5,3.5,46.5,under,-7,7,home,0.666667,0.370370,home,50.00,-100.0,50.00,-100.0,90.91,-100.00,90.91,-100.00,-100.00,90.91,1
2394,2023-01-29,2022,21,POST,Philadelphia Eagles,San Francisco 49ers,31,7,Jalen Hurts,Brock Purdy,0.756473,2.287960,0.0,0.513945,-0.159588,0.134125,,,,,,,1,Philadelphia Eagles,San Francisco 49ers,31,7,38,-155.0,135.0,-3.0,3.0,45.0,under,-24,24,home,0.607843,0.425532,home,64.52,-100.0,64.52,-100.0,90.91,-100.00,90.91,-100.00,-100.00,90.91,1
2395,2023-01-29,2022,21,POST,Kansas City Chiefs,Cincinnati Bengals,23,20,Patrick Mahomes,Joe Burrow,0.626274,0.390027,0.0,0.641969,-0.456237,-0.444345,0.235993,0.222464,0.0,0.210351,0.832600,0.139439,1,Kansas City Chiefs,Cincinnati Bengals,23,20,43,-130.0,110.0,-2.0,2.0,48.5,under,-3,3,home,0.565217,0.476190,home,76.92,-100.0,76.92,-100.0,90.91,-100.00,90.91,-100.00,-100.00,90.91,1


In [69]:
nfl_cleaned_with_betting_final.to_csv('../../data/adjusted_stats_point_spread_modeling_base_df.csv')

In [70]:
# Split data into train and test sets, cross validation for hyperparameter tuning

train_df = nfl_cleaned_with_betting_final[nfl_cleaned_with_betting_final.season <= 2021]
train_df = train_df.dropna()

test_df = nfl_cleaned_with_betting_final[nfl_cleaned_with_betting_final.season == 2022]
test_df = test_df.dropna()

In [12]:
# NOTE
# Dont know if i need to balance this data set at all, point spread betting should be pretty 50/50


# Balance training data set

# Note: Can't balance data set and then do Cross Validation due to data leakage

# Fully balancing the data set doesnt work as well for prediction as the unbalanced, likely because the 
# home team does actually have an important advantage. But, maybe balancing the data set slightly will help

# balance_n = int(np.round((sum(train_df.home_win==1) - sum(train_df.home_win==0))/1.5))
# to_add = train_df[train_df.home_win==0].sample(n=balance_n, random_state=57, replace=True)

# train_df_balanced = pd.concat([train_df, to_add])
# train_df_balanced

Unnamed: 0,date,season,week,season_type,home,away,home_score,away_score,home_qb,away_qb,qb_adjusted_value_home,rushing_adjusted_value_home,qb_rushing_value_pct_home,qb_def_adjusted_value_home,rush_def_adjusted_value_home,special_teams_value_home,qb_adjusted_value_away,rushing_adjusted_value_away,qb_rushing_value_pct_away,qb_def_adjusted_value_away,rush_def_adjusted_value_away,special_teams_value_away,home_win
33,2014-09-21,2014,3,REG,New England Patriots,Las Vegas Raiders,16,9,Tom Brady,Derek Carr,0.456221,-0.500777,0.0,2.010295,0.231263,0.937560,0.007547,0.969365,0.0,-0.691465,-0.992021,-0.444697,1
36,2014-09-21,2014,3,REG,Cleveland Browns,Baltimore Ravens,21,23,Brian Hoyer,Joe Flacco,-0.688197,0.124970,0.0,0.660149,-1.343275,-0.273793,0.652041,-0.098594,0.0,0.629275,-0.397593,0.040821,0
37,2014-09-21,2014,3,REG,Carolina Panthers,Pittsburgh Steelers,19,37,Cam Newton,Ben Roethlisberger,0.767364,-0.738487,0.0,0.730025,0.510499,0.963767,-0.690944,0.546799,0.0,-0.802556,-0.322210,0.152202,0
38,2014-09-21,2014,3,REG,Arizona Cardinals,San Francisco 49ers,23,14,Drew Stanton,Colin Kaepernick,-1.157754,0.332257,0.0,-0.737241,0.919733,0.811753,-0.824994,-1.032782,0.0,-0.144545,0.759720,0.278262,1
39,2014-09-21,2014,3,REG,Buffalo Bills,Los Angeles Chargers,10,22,EJ Manuel,Philip Rivers,0.603547,-2.148207,0.0,1.100186,1.118419,1.432238,1.482525,-0.731489,0.0,0.011487,-0.674026,1.042154,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1188,2018-11-04,2018,9,REG,Washington Football Team,Atlanta Falcons,14,38,Alex Smith,Matt Ryan,-0.264829,-0.084019,0.0,-0.104279,-0.165206,0.526713,1.093633,-0.270172,0.0,-1.020006,-0.785239,-0.209668,0
1925,2021-10-03,2021,4,REG,Philadelphia Eagles,Kansas City Chiefs,30,42,Jalen Hurts,Patrick Mahomes,-0.022404,-0.114117,0.0,0.122893,0.436458,-0.625807,0.988577,0.612736,0.0,-0.260635,-1.163442,-0.334241,0
1785,2020-12-06,2020,13,REG,Arizona Cardinals,Los Angeles Rams,28,38,Kyler Murray,Jared Goff,0.430281,0.508746,0.0,-0.082071,-0.485238,-0.556574,-0.733724,-0.467673,0.0,0.835681,1.244867,-0.866246,0
580,2016-10-02,2016,4,REG,Baltimore Ravens,Las Vegas Raiders,27,28,Joe Flacco,Derek Carr,-0.404237,-0.669535,0.0,0.362706,0.043280,0.880426,-0.247696,0.321605,0.0,-0.061886,-0.026805,0.212009,0


In [25]:
# Shuffle training set before cross validation

train_df_shuffled = train_df.sample(frac=1, random_state=42)
train_df_shuffled.head()

Unnamed: 0,date,season,week,season_type,home,away,home_score_x,away_score_x,home_qb,away_qb,qb_adjusted_value_home,rushing_adjusted_value_home,qb_rushing_value_pct_home,qb_def_adjusted_value_home,rush_def_adjusted_value_home,special_teams_value_home,qb_adjusted_value_away,rushing_adjusted_value_away,qb_rushing_value_pct_away,qb_def_adjusted_value_away,rush_def_adjusted_value_away,special_teams_value_away,home_win,home_team,away_team,home_score_y,away_score_y,total_score,home_moneyline,away_moneyline,home_spread,away_spread,total_score.1,over_under_result,home_team_actual_line,away_team_actual_line,spread_cover_result,home_implied_prob,away_implied_prob,game_winner,favorite_return,underdog_return,home_return,away_return,favorite_spread_return,underdog_spread_return,home_spread_return,away_spread_return,over_return,under_return,home_cover
1166,2018-11-11,2018,10,REG,Cleveland Browns,Atlanta Falcons,28,16,Baker Mayfield,Matt Ryan,-0.941167,-0.012252,0.0,-0.190158,-0.750509,0.121375,1.112148,-0.11453,0.0,-0.908372,-0.671231,-0.115014,1,Cleveland Browns,Atlanta Falcons,28,16,44,215.0,-263.0,5.5,-5.5,49.5,under,-12,12,home,0.31746,0.724518,home,-100.0,215.0,215.0,-100.0,-100.0,90.91,90.91,-100.0,-100.0,90.91,1
358,2015-11-01,2015,8,REG,Los Angeles Rams,San Francisco 49ers,27,6,Nick Foles,Colin Kaepernick,-0.909849,-0.220933,0.0,0.350701,0.420085,-0.081438,-0.383072,-0.351282,0.0,-0.83758,-0.466877,0.125281,1,Los Angeles Rams,San Francisco 49ers,27,6,33,-400.0,346.0,-8.0,8.0,41.0,under,-21,21,home,0.8,0.224215,home,25.0,-100.0,25.0,-100.0,90.91,-100.0,90.91,-100.0,-100.0,90.91,1
516,2016-09-11,2016,1,REG,Kansas City Chiefs,Los Angeles Chargers,33,27,Alex Smith,Philip Rivers,0.156926,0.691923,0.0,0.659606,-0.024642,0.002125,0.167622,-0.621257,0.0,0.158414,0.178626,-0.042608,1,Kansas City Chiefs,Los Angeles Chargers,33,27,60,-275.0,244.0,-6.5,6.5,45.5,over,-6,6,away,0.733333,0.290698,home,36.36,-100.0,36.36,-100.0,-100.0,90.91,-100.0,90.91,90.91,-100.0,0
1154,2018-11-04,2018,9,REG,Washington Football Team,Atlanta Falcons,14,38,Alex Smith,Matt Ryan,-0.264829,-0.084019,0.0,-0.104279,-0.165206,0.526713,1.093633,-0.270172,0.0,-1.020006,-0.785239,-0.209668,0,Washington Football Team,Atlanta Falcons,14,38,52,-132.0,110.0,-2.0,2.0,47.0,over,24,-24,away,0.568966,0.47619,away,-100.0,110.0,-100.0,110.0,-100.0,90.91,-100.0,90.91,90.91,-100.0,0
641,2016-11-13,2016,10,REG,Jacksonville Jaguars,Houston Texans,21,24,Blake Bortles,Brock Osweiler,-0.268586,-0.292747,0.0,0.437538,-0.322575,-1.248456,-0.94351,0.052969,0.0,-0.017208,-0.165503,-0.400847,0,Jacksonville Jaguars,Houston Texans,21,24,45,-156.0,141.0,-3.0,3.0,42.0,over,3,-3,away,0.609375,0.414938,away,-100.0,141.0,-100.0,141.0,-100.0,90.91,-100.0,90.91,90.91,-100.0,0


In [26]:
# Prepare feature list

# feature_list = ['passing_value_home', 'rushing_value_home', 'pass_def_value_home', 'rush_def_value_home',
#                    'passing_value_away', 'rushing_value_away', 'pass_def_value_away', 'rush_def_value_away']

# feature_list = ['qb_value_home', 'passing_value_home', 'rushing_value_home', 'qb_def_value_home', 'pass_def_value_home', 'rush_def_value_home', 'special_teams_value_home',
#                    'qb_value_away', 'passing_value_away', 'rushing_value_away', 'qb_def_value_away', 'pass_def_value_away', 'rush_def_value_away', 'special_teams_value_away']

feature_list = ['qb_adjusted_value_home', 'rushing_adjusted_value_home', 'qb_def_adjusted_value_home', 'rush_def_adjusted_value_home', 'special_teams_value_home',
                   'qb_adjusted_value_away', 'rushing_adjusted_value_away', 'qb_def_adjusted_value_away', 'rush_def_adjusted_value_away', 'special_teams_value_away',
                'home_spread']

In [31]:
# Get features, labels for train, val, and test sets

# train_x = train_df_balanced[feature_list].to_numpy()
# train_y = train_df_balanced.home_win.to_numpy()

train_x = train_df_shuffled[feature_list].to_numpy()
train_y = train_df_shuffled.home_cover.to_numpy()

test_x = test_df[feature_list]
test_y = test_df.home_cover.to_numpy()

In [32]:
train_df_shuffled.week

1166    10
358      8
516      1
1154     9
641     10
        ..
1352     4
1553    17
1038     1
1755    14
1347     4
Name: week, Length: 1770, dtype: int64

In [33]:
# Function to get predictions and probabilities for train, val, test sets

def get_preds(model, train_x, test_x):
    train_preds = model.predict(train_x)
    train_probs = model.predict_proba(train_x)
    
    test_preds = model.predict(test_x)
    test_probs = model.predict_proba(test_x)
    
    return train_preds, train_probs, test_preds, test_probs

# Function to get accuracy scores for train, val sets

def print_cv_results(model, train_x, train_y, cv_folds, verbose=True):
    cv_results = cross_validate(model, train_x, train_y, cv=5, return_train_score=True)
    
    train_scores = cv_results['train_score']
    val_scores = cv_results['test_score']
    
    if verbose:      
        for i, scores in enumerate(zip(train_scores, val_scores)):
            print('Fold {}, Train Accuracy: {}, Validation Accuracy: {}'.format(i+1,scores[0], scores[1]))     
        print()
        
    print('Average Training Accuracy: {}'.format(np.mean(train_scores)))
    print('Average Validation Accuracy: {}'.format(np.mean(val_scores)))
    
    return np.mean(train_scores), np.mean(val_scores)

In [34]:
# Simple Logistic Regression Model

lr_model = LogisticRegression()

print_cv_results(lr_model, train_x, train_y, 5)

Fold 1, Train Accuracy: 0.5494350282485876, Validation Accuracy: 0.5
Fold 2, Train Accuracy: 0.538135593220339, Validation Accuracy: 0.536723163841808
Fold 3, Train Accuracy: 0.556497175141243, Validation Accuracy: 0.5225988700564972
Fold 4, Train Accuracy: 0.5416666666666666, Validation Accuracy: 0.5056497175141242
Fold 5, Train Accuracy: 0.5331920903954802, Validation Accuracy: 0.5423728813559322

Average Training Accuracy: 0.5437853107344632
Average Validation Accuracy: 0.5214689265536723


(0.5437853107344632, 0.5214689265536723)

In [35]:
# Default Random Forest Classifier with no hyperparameter tuning

rf_model = RandomForestClassifier()

print_cv_results(rf_model, train_x, train_y, 5)

Fold 1, Train Accuracy: 1.0, Validation Accuracy: 0.5084745762711864
Fold 2, Train Accuracy: 1.0, Validation Accuracy: 0.4774011299435028
Fold 3, Train Accuracy: 1.0, Validation Accuracy: 0.5141242937853108
Fold 4, Train Accuracy: 1.0, Validation Accuracy: 0.4830508474576271
Fold 5, Train Accuracy: 1.0, Validation Accuracy: 0.5480225988700564

Average Training Accuracy: 1.0
Average Validation Accuracy: 0.5062146892655367


(1.0, 0.5062146892655367)

In [36]:
# Default XGBoost Model with no hyperparameter tuning

xgb_model = xgb.XGBClassifier()

print_cv_results(xgb_model, train_x, train_y, 5)



















Fold 1, Train Accuracy: 1.0, Validation Accuracy: 0.4915254237288136
Fold 2, Train Accuracy: 1.0, Validation Accuracy: 0.4887005649717514
Fold 3, Train Accuracy: 1.0, Validation Accuracy: 0.5423728813559322
Fold 4, Train Accuracy: 1.0, Validation Accuracy: 0.5141242937853108
Fold 5, Train Accuracy: 1.0, Validation Accuracy: 0.480225988700565

Average Training Accuracy: 1.0
Average Validation Accuracy: 0.5033898305084745


(1.0, 0.5033898305084745)

In [37]:
# Logistic Regression, with hyperparameter tuning

def logistic_regression_tuning(train_x, train_y, verbose=True):
    
    lr_model = LogisticRegression()
    
    # Hyperparameters to tune
    penalty_list = ['none', 'l2', 'l1', 'elasticnet']
    c_values = [100, 10, 1.0, 0.1, 0.01]
    solvers = ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']
    
    params = dict(solver=solvers, penalty=penalty_list, C=c_values)
    
    clf = GridSearchCV(lr_model, params, return_train_score=True).fit(train_x, train_y)
    
    print('Best Result: {}'.format(clf.best_score_))
    print('Best Parameters: {}'.format(clf.best_params_))
    print()
    
    train_scores = clf.cv_results_['mean_train_score']
    val_scores = clf.cv_results_['mean_test_score']
    param_list = clf.cv_results_['params']
    
    if verbose:
        print('Parameter Combinations and Results:')
        for train_score, val_score, params in zip(train_scores, val_scores, param_list):
            print('Train Score: {}, Val Score: {}, Parameters: {}'.format(train_score, val_score, params))

In [38]:
# Can uncomment and run below to see, but hyperparameter tuning didnt change much. Can probably just roll with the 
# default logistic regression

# logistic_regression_tuning(train_x, train_y)

In [42]:
# Save best logistic regression model 

best_lr_model = LogisticRegression().fit(train_x, train_y)
dump(best_lr_model, 'saved_models/ps_logistic_regression_av.joblib')

['saved_models/ps_logistic_regression_av.joblib']

In [43]:
# Save best random forest model 

best_rf_model = RandomForestClassifier().fit(train_x, train_y)
dump(best_rf_model, 'saved_models/ps_random_forest_av.joblib')

['saved_models/ps_random_forest_av.joblib']

In [44]:
# Save best xg boost model 

best_xgb_model = xgb.XGBClassifier().fit(train_x, train_y)
dump(best_xgb_model, 'saved_models/ps_xg_boost_av.joblib')





['saved_models/ps_xg_boost_av.joblib']