In [31]:
import pandas as pd

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

In [2]:
elo_data = pd.read_csv('../../data/input/NFL_Team_Elo.csv')
elo_data.head()

Unnamed: 0,NFL_TEAM,NFL_YEAR,NFL_WEEK,ELO
0,Arizona Cardinals,91-92,1,1300.0
1,Atlanta Falcons,91-92,1,1300.0
2,Baltimore Ravens,91-92,1,1300.0
3,Buffalo Bills,91-92,1,1300.0
4,Carolina Panthers,91-92,1,1300.0


In [6]:
game_data = pd.read_csv('../../data/cleaned/NFL_wk_by_wk_cleaned.csv')


game_data['GAME_DATE'] = pd.to_datetime(game_data['GAME_DATE'])
first_year = sorted(game_data['GAME_DATE'].dt.year.unique())[0]
last_year  = sorted(game_data['GAME_DATE'].dt.year.unique())[-1]
sorted_years = [f"{str(yr)[-2:]}"+"-"+f"{str(yr+1)[-2:]}" for yr in range(int(first_year), int(last_year)+1) ]
game_data['YEAR'] = pd.Categorical(game_data['YEAR'], sorted_years)

first_year, last_year, sorted_years = None, None, None
del first_year, last_year, sorted_years

game_data = game_data.loc[:,['WINNER','LOSER','YEAR','WEEK_NUM']]
game_data['RESULT'] = 1
game_data.columns = ['TEAM_A','TEAM_B','YEAR','WEEK_NUM', 'RESULT']

game_data_flipped = game_data.loc[:,['TEAM_B','TEAM_A','YEAR','WEEK_NUM']]
game_data_flipped['RESULT'] = 0
game_data_flipped.columns = ['TEAM_A','TEAM_B','YEAR','WEEK_NUM', 'RESULT']

game_data.head()

Unnamed: 0,TEAM_A,TEAM_B,YEAR,WEEK_NUM,RESULT
0,Kansas City Chiefs,Atlanta Falcons,91-92,1,1
1,Dallas Cowboys,Cleveland Browns,91-92,1,1
2,Philadelphia Eagles,Green Bay Packers,91-92,1,1
3,New Orleans Saints,Seattle Seahawks,91-92,1,1
4,New York Jets,Tampa Bay Buccaneers,91-92,1,1


In [8]:
game_data = pd.concat([game_data, game_data_flipped]).sort_values(['YEAR','WEEK_NUM','TEAM_A'])
game_data.head()

Unnamed: 0,TEAM_A,TEAM_B,YEAR,WEEK_NUM,RESULT
9,Arizona Cardinals,Los Angeles Rams,91-92,1,1
0,Atlanta Falcons,Kansas City Chiefs,91-92,1,0
5,Buffalo Bills,Miami Dolphins,91-92,1,1
6,Chicago Bears,Minnesota Vikings,91-92,1,1
7,Cincinnati Bengals,Denver Broncos,91-92,1,0


In [45]:
model_data = pd.merge(game_data,elo_data, how='left', left_on=['TEAM_A','YEAR','WEEK_NUM'], right_on=['NFL_TEAM','NFL_YEAR','NFL_WEEK'])
model_data.drop(['NFL_TEAM','NFL_YEAR','NFL_WEEK'], axis=1, inplace=True)
model_data.columns = ['TEAM_A','TEAM_B','YEAR','WEEK_NUM', 'RESULT','TEAM_A_ELO']

model_data = pd.merge(model_data,elo_data, how='left', left_on=['TEAM_B','YEAR','WEEK_NUM'], right_on=['NFL_TEAM','NFL_YEAR','NFL_WEEK'])
model_data.drop(['NFL_TEAM','NFL_YEAR','NFL_WEEK'], axis=1, inplace=True)
model_data.columns = ['TEAM_A','TEAM_B','YEAR','WEEK_NUM', 'RESULT','TEAM_A_ELO','TEAM_B_ELO']

model_data = pd.concat([model_data, pd.get_dummies(model_data.loc[:,['YEAR','WEEK_NUM']])], axis=1)
model_data.drop(['YEAR','WEEK_NUM'],axis=1,inplace=True)

model_data.head()

Unnamed: 0,TEAM_A,TEAM_B,RESULT,TEAM_A_ELO,TEAM_B_ELO,YEAR_00-01,YEAR_01-02,YEAR_02-03,YEAR_03-04,YEAR_04-05,...,YEAR_22-23,YEAR_91-92,YEAR_92-93,YEAR_93-94,YEAR_94-95,YEAR_95-96,YEAR_96-97,YEAR_97-98,YEAR_98-99,YEAR_99-00
0,Arizona Cardinals,Los Angeles Rams,1,1300.0,1300.0,False,False,False,False,False,...,False,True,False,False,False,False,False,False,False,False
1,Atlanta Falcons,Kansas City Chiefs,0,1300.0,1300.0,False,False,False,False,False,...,False,True,False,False,False,False,False,False,False,False
2,Buffalo Bills,Miami Dolphins,1,1300.0,1300.0,False,False,False,False,False,...,False,True,False,False,False,False,False,False,False,False
3,Chicago Bears,Minnesota Vikings,1,1300.0,1300.0,False,False,False,False,False,...,False,True,False,False,False,False,False,False,False,False
4,Cincinnati Bengals,Denver Broncos,0,1300.0,1300.0,False,False,False,False,False,...,False,True,False,False,False,False,False,False,False,False


In [47]:
x_train, x_test, y_train, y_test = train_test_split(model_data.loc[:,[  'TEAM_A_ELO', 'TEAM_B_ELO', 'YEAR_00-01',
                                                                        'YEAR_01-02', 'YEAR_02-03', 'YEAR_03-04', 'YEAR_04-05', 'YEAR_05-06',
                                                                        'YEAR_06-07', 'YEAR_07-08', 'YEAR_08-09', 'YEAR_09-10', 'YEAR_10-11',
                                                                        'YEAR_11-12', 'YEAR_12-13', 'YEAR_13-14', 'YEAR_14-15', 'YEAR_15-16',
                                                                        'YEAR_16-17', 'YEAR_17-18', 'YEAR_18-19', 'YEAR_19-20', 'YEAR_20-21',
                                                                        'YEAR_21-22', 'YEAR_22-23', 'YEAR_91-92', 'YEAR_92-93', 'YEAR_93-94',
                                                                        'YEAR_94-95', 'YEAR_95-96', 'YEAR_96-97', 'YEAR_97-98', 'YEAR_98-99',
                                                                        'YEAR_99-00']], model_data['RESULT'], test_size=0.25, random_state=123)

In [48]:
## Instantiate LogisticRegression model
logistic_regression = LogisticRegression()

In [49]:
## Train model
logistic_regression.fit(x_train, y_train)

In [50]:
predictions = logistic_regression.predict(x_test)

In [51]:
score = logistic_regression.score(x_test, y_test)
print(score)

0.6143915849868515
