In [42]:
# packages
import pandas as pd
import numpy as np
import datetime
import warnings
with warnings.catch_warnings():
    warnings.filterwarnings("ignore",category=DeprecationWarning)
    import sklearn

# required machine learning packages
from sklearn import model_selection
from sklearn.feature_selection import RFE
from sklearn.metrics import brier_score_loss, roc_auc_score
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.linear_model import LogisticRegression
from sklearn.calibration import CalibratedClassifierCV as CCV

from sklearn.neural_network import MLPClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, VotingClassifier
import xgboost as xgb

In [43]:
# Load Data
path = "./data/"
df = pd.read_csv(path + 'spreadspoke_scores.csv')
teams = pd.read_csv(path + "nfl_teams.csv")
games_elo = pd.read_csv(path + "nfl_games.csv")

In [44]:
# Clean data
# replacing blank strings with NaN
df = df.replace(r'^\s*$', np.nan, regex=True)

# removing rows from specific columns that have null values, resetting index and changing data types
df = df[(df.score_home.isnull() == False) & (df.team_favorite_id.isnull() == False) & (df.over_under_line.isnull() == False) &
        (df.schedule_season >= 1979)]

df.reset_index(drop=True, inplace=True)
df['over_under_line'] = df.over_under_line.astype(float)

# mapping team_id to the correct teams
df['team_home'] = df.team_home.map(teams.set_index('team_name')['team_id'].to_dict())
df['team_away'] = df.team_away.map(teams.set_index('team_name')['team_id'].to_dict())

# fix team_favorite_id for Colts in 1969 and 1971 SB
df.loc[(df.schedule_season == 1968) & (df.schedule_week == 'Superbowl'), 'team_favorite_id'] = 'IND'
df.loc[(df.schedule_season == 1970) & (df.schedule_week == 'Superbowl'), 'team_favorite_id'] = 'IND'

# creating home favorite and away favorite columns (fill na with 0's)
df.loc[df.team_favorite_id == df.team_home, 'home_favorite'] = 1
df.loc[df.team_favorite_id == df.team_away, 'away_favorite'] = 1
df.home_favorite.fillna(0, inplace=True)
df.away_favorite.fillna(0, inplace=True)

# creating over / under column (fill na with 0's)
df.loc[((df.score_home + df.score_away) > df.over_under_line), 'over'] = 1
df.over.fillna(0, inplace=True)

# stadium neutral and schedule playoff as boolean
df['stadium_neutral'] = df.stadium_neutral.astype(int)
df['schedule_playoff'] = df.schedule_playoff.astype(int)

# change data type of date columns
df['schedule_date'] = pd.to_datetime(df['schedule_date'])
games_elo['date'] = pd.to_datetime(games_elo['date'])


In [45]:
# fixing some schedule_week column errors and converting column to integer data type
df.loc[(df.schedule_week == '18'), 'schedule_week'] = '17'
df.loc[(df.schedule_week == 'Wildcard') | (df.schedule_week == 'WildCard'), 'schedule_week'] = '18'
df.loc[(df.schedule_week == 'Division'), 'schedule_week'] = '19'
df.loc[(df.schedule_week == 'Conference'), 'schedule_week'] = '20'
df.loc[(df.schedule_week == 'Superbowl') | (df.schedule_week == 'SuperBowl'), 'schedule_week'] = '21'
df['schedule_week'] = df.schedule_week.astype(int)

In [46]:
# removing extra columns that aren't necessary for analysis
df = df[['schedule_date', 'schedule_season', 'schedule_week', 'team_home',
       'team_away', 'team_favorite_id', 'spread_favorite',
       'over_under_line', 'weather_temperature',
       'weather_wind_mph', 'score_home', 'score_away',
       'stadium_neutral', 'home_favorite', 'away_favorite',
       'over']]

In [47]:
# Cleaning games_elo and df to merge correctly
wsh_map = {'WSH' : 'WAS'}
games_elo.loc[games_elo.team1 == 'WSH', 'team1'] = 'WAS' 
games_elo.loc[games_elo.team2 == 'WSH', 'team2'] = 'WAS'

# fix dates
df.loc[(df.schedule_date == '2016-09-19') & (df.team_home == 'MIN'), 'schedule_date'] = datetime.datetime(2016, 9, 18)
df.loc[(df.schedule_date == '2017-01-22') & (df.schedule_week == 21), 'schedule_date'] = datetime.datetime(2017, 2, 5)
df.loc[(df.schedule_date == '1990-01-27') & (df.schedule_week == 21), 'schedule_date'] = datetime.datetime(1990, 1, 28)
df.loc[(df.schedule_date == '1990-01-13'), 'schedule_date'] = datetime.datetime(1990, 1, 14)
games_elo.loc[(games_elo.date == '2016-01-09'), 'date'] = datetime.datetime(2016, 1, 10)
games_elo.loc[(games_elo.date == '2016-01-08'), 'date'] = datetime.datetime(2016, 1, 9)
games_elo.loc[(games_elo.date == '2016-01-16'), 'date'] = datetime.datetime(2016, 1, 17)
games_elo.loc[(games_elo.date == '2016-01-15'), 'date'] = datetime.datetime(2016, 1, 16)

In [48]:
# merge games_elo with df
df = df.merge(games_elo, left_on=['schedule_date', 'team_home', 'team_away'], right_on=['date', 'team1', 'team2'], how='left')

# merge to fix neutral games where team_home and team_away are switched
games_elo2 = games_elo.rename(columns={'team1' : 'team2', 'team2' : 'team1', 'elo1' : 'elo2', 'elo2' : 'elo1'})
games_elo2['elo_prob1'] = 1 - games_elo2.elo_prob1
df = df.merge(games_elo2, left_on=['schedule_date', 'team_home', 'team_away'], right_on=['date', 'team1', 'team2'], how='left')
df = df.drop(columns=['qbelo1_pre_x', 'qbelo2_pre_x', 'qb1_x', 'qb2_x',
       'qb1_value_pre_x', 'qb2_value_pre_x', 'qb1_adj_x', 'qb2_adj_x',
       'qbelo_prob1_x', 'qbelo_prob2_x', 'qb1_game_value_x',
       'qb2_game_value_x', 'qb1_value_post_x', 'qb2_value_post_x',
       'qbelo1_post_x', 'qbelo2_post_x', 'qbelo1_pre_y', 'qbelo2_pre_y', 'qb1_y', 'qb2_y',
       'qb1_value_pre_y', 'qb2_value_pre_y', 'qb1_adj_y', 'qb2_adj_y',
       'qbelo_prob1_y', 'qbelo_prob2_y', 'qb1_game_value_y',
       'qb2_game_value_y', 'qb1_value_post_y', 'qb2_value_post_y',
       'qbelo1_post_y', 'qbelo2_post_y', 'elo1_post_x',
       'elo2_post_x', 'elo1_post_y',
       'elo2_post_y'])

In [49]:
# separating merged columns into x and y cols
x_cols = ['date_x', 'season_x', 'neutral_x', 'playoff_x', 'team1_x', 'team2_x', 'elo1_pre_x', 'elo2_pre_x', 'elo_prob1_x', 'elo_prob2_x', 'score1_x','score2_x']
y_cols = ['date_y', 'season_y','neutral_y', 'playoff_y', 'team2_y', 'team1_y', 'elo1_pre_y','elo2_pre_y', 'elo_prob1_y', 'elo_prob2_y', 'score1_y', 'score2_y']

# filling null values for games_elo merged cols
for x, y in zip(x_cols, y_cols):
    df[x] = df[x].fillna(df[y]) 

# removing y_cols from dataframe    
df = df[['schedule_date', 'schedule_season', 'schedule_week', 'team_home',
       'team_away', 'team_favorite_id', 'spread_favorite', 'over_under_line',
       'weather_temperature', 'weather_wind_mph', 'score_home', 'score_away',
       'stadium_neutral', 'home_favorite', 'away_favorite', 'over', 'neutral_x', 'playoff_x',
         'elo1_pre_x', 'elo2_pre_x', 'elo_prob1_x', 'elo_prob2_x']]

# remove _x ending from column names
df.columns = df.columns.str.replace('_x', '')
# creating result column df.loc[(df.score_home > df.score_away), 'result'
df['result'] = (df.score_home > df.score_away).astype(int)

In [50]:
# Feature Engineering
# creating 2 separate dataframes with the home teams / scores and the away teams / scores
score = df.groupby(['schedule_season', 'schedule_week', 'team_home']).mean()[['score_home', 'score_away']].reset_index()
aw_score = df.groupby(['schedule_season', 'schedule_week', 'team_away']).mean()[['score_home', 'score_away']].reset_index()

# create total pts column
score['point_diff'] = score.score_home - score.score_away
aw_score['point_diff'] = aw_score.score_away - aw_score.score_home

# append the two dataframes
score = score.append(aw_score, ignore_index=True, sort=True)

# fill null values
score.team_home.fillna(score.team_away, inplace=True)

# sort by season and week 
score.sort_values(['schedule_season', 'schedule_week'], ascending = [True, True], inplace=True)

# removing unneeded columns & changing column name 
score = score[['schedule_season', 'schedule_week', 'team_home', 'point_diff']]
score.rename(columns={'team_home' : 'team'}, inplace=True)

  score = df.groupby(['schedule_season', 'schedule_week', 'team_home']).mean()[['score_home', 'score_away']].reset_index()
  aw_score = df.groupby(['schedule_season', 'schedule_week', 'team_away']).mean()[['score_home', 'score_away']].reset_index()
  score = score.append(aw_score, ignore_index=True, sort=True)


In [51]:
# dictionary of dataframes - separate dataframe for each team
tm_dict = {}
for key in score.team.unique():
    tm_dict[key] = score[score.team == key].reset_index(drop=True)

In [52]:
# dataframe to populate
pts_diff = pd.DataFrame()

# for loop to create a rolling average of the previous games for each season
for yr in score.schedule_season.unique():
    for tm in score.team.unique():
        data = tm_dict[tm].copy()
        data = data[data.schedule_season == yr]
        
        data.loc[:, 'avg_pts_diff'] = data.point_diff.shift().expanding().mean()
        
        pts_diff = pts_diff.append(data)

  pts_diff = pts_diff.append(data)
  pts_diff = pts_diff.append(data)
  pts_diff = pts_diff.append(data)
  pts_diff = pts_diff.append(data)
  pts_diff = pts_diff.append(data)
  pts_diff = pts_diff.append(data)
  pts_diff = pts_diff.append(data)
  pts_diff = pts_diff.append(data)
  pts_diff = pts_diff.append(data)
  pts_diff = pts_diff.append(data)
  pts_diff = pts_diff.append(data)
  pts_diff = pts_diff.append(data)
  pts_diff = pts_diff.append(data)
  pts_diff = pts_diff.append(data)
  pts_diff = pts_diff.append(data)
  pts_diff = pts_diff.append(data)
  pts_diff = pts_diff.append(data)
  pts_diff = pts_diff.append(data)
  pts_diff = pts_diff.append(data)
  pts_diff = pts_diff.append(data)
  pts_diff = pts_diff.append(data)
  pts_diff = pts_diff.append(data)
  pts_diff = pts_diff.append(data)
  pts_diff = pts_diff.append(data)
  pts_diff = pts_diff.append(data)
  pts_diff = pts_diff.append(data)
  pts_diff = pts_diff.append(data)
  pts_diff = pts_diff.append(data)
  pts_diff = pts_dif

In [53]:
# merging to df and changing column names
df = df.merge(pts_diff[['schedule_season', 'schedule_week', 'team', 'avg_pts_diff']], 
              left_on=['schedule_season', 'schedule_week', 'team_home'], right_on=['schedule_season', 'schedule_week', 'team'],
              how='left')

df.rename(columns={'avg_pts_diff' : 'hm_avg_pts_diff'}, inplace=True)

df = df.merge(pts_diff[['schedule_season', 'schedule_week', 'team', 'avg_pts_diff']], 
              left_on=['schedule_season', 'schedule_week', 'team_away'], right_on=['schedule_season', 'schedule_week', 'team'],
              how='left')

df.rename(columns={'avg_pts_diff' : 'aw_avg_pts_diff'}, inplace=True)

In [54]:
# average point differential over entire season
total_season = pts_diff.groupby(['schedule_season', 'team']).mean()['point_diff'].reset_index()

In [55]:
# adding schedule week for merge and adding one to the season for predictions
total_season['schedule_week'] = 1
total_season['schedule_season'] += 1
df.head()

Unnamed: 0,schedule_date,schedule_season,schedule_week,team_home,team_away,team_favorite_id,spread_favorite,over_under_line,weather_temperature,weather_wind_mph,...,playoff,elo1_pre,elo2_pre,elo_prob1,elo_prob2,result,team_x,hm_avg_pts_diff,team_y,aw_avg_pts_diff
0,1979-09-01,1979,1,TB,DET,TB,-3.0,30.0,79.0,9.0,...,,1385.204,1487.039,0.447188,0.552812,1,TB,,DET,
1,1979-09-02,1979,1,BUF,MIA,MIA,-5.0,39.0,74.0,15.0,...,,1422.82,1573.513,0.379119,0.620881,0,BUF,,MIA,
2,1979-09-02,1979,1,CHI,GB,CHI,-3.0,31.0,78.0,11.0,...,,1485.806,1462.502,0.624412,0.375588,1,CHI,,GB,
3,1979-09-02,1979,1,DEN,CIN,DEN,-3.0,31.5,69.0,6.0,...,,1579.121,1491.38,0.706666,0.293334,1,DEN,,CIN,
4,1979-09-02,1979,1,KC,IND,KC,-1.0,37.0,76.0,8.0,...,,1408.082,1420.16,0.575577,0.424423,1,KC,,IND,


In [56]:
# cleaning of columns
df = df[['schedule_date', 'schedule_season', 'schedule_week', 'team_home',
       'team_away', 'team_favorite_id', 'spread_favorite', 'over_under_line',
       'weather_temperature', 'weather_wind_mph', 'score_home', 'score_away', 'stadium_neutral', 'home_favorite',
       'away_favorite', 'hm_avg_pts_diff','aw_avg_pts_diff', 'elo1_pre', 'elo2_pre', 'elo_prob1', 'over', 'result']]

df.head()

Unnamed: 0,schedule_date,schedule_season,schedule_week,team_home,team_away,team_favorite_id,spread_favorite,over_under_line,weather_temperature,weather_wind_mph,...,stadium_neutral,home_favorite,away_favorite,hm_avg_pts_diff,aw_avg_pts_diff,elo1_pre,elo2_pre,elo_prob1,over,result
0,1979-09-01,1979,1,TB,DET,TB,-3.0,30.0,79.0,9.0,...,0,1.0,0.0,,,1385.204,1487.039,0.447188,1.0,1
1,1979-09-02,1979,1,BUF,MIA,MIA,-5.0,39.0,74.0,15.0,...,0,0.0,1.0,,,1422.82,1573.513,0.379119,0.0,0
2,1979-09-02,1979,1,CHI,GB,CHI,-3.0,31.0,78.0,11.0,...,0,1.0,0.0,,,1485.806,1462.502,0.624412,0.0,1
3,1979-09-02,1979,1,DEN,CIN,DEN,-3.0,31.5,69.0,6.0,...,0,1.0,0.0,,,1579.121,1491.38,0.706666,0.0,1
4,1979-09-02,1979,1,KC,IND,KC,-1.0,37.0,76.0,8.0,...,0,1.0,0.0,,,1408.082,1420.16,0.575577,0.0,1


In [57]:
# merge to have previous seasons average point differential
df = df.merge(total_season[['schedule_season', 'schedule_week', 'team', 'point_diff']], 
              left_on=['schedule_season', 'schedule_week', 'team_home'], right_on=['schedule_season', 'schedule_week', 'team'],
              how='left')

df.rename(columns={'point_diff' : 'hm_avg_diff'}, inplace=True)

df = df.merge(total_season[['schedule_season', 'schedule_week', 'team', 'point_diff']], 
              left_on=['schedule_season', 'schedule_week', 'team_away'], right_on=['schedule_season', 'schedule_week', 'team'],
              how='left')

df.rename(columns={'point_diff' : 'aw_avg_diff'}, inplace=True)

# fill null values
df.hm_avg_pts_diff.fillna(df.hm_avg_diff, inplace=True)
df.aw_avg_pts_diff.fillna(df.aw_avg_diff, inplace=True)

In [58]:
# cleaning of columns
df = df[['schedule_date', 'schedule_season', 'schedule_week', 'team_home',
       'team_away', 'team_favorite_id', 'spread_favorite', 'over_under_line',
       'weather_temperature', 'weather_wind_mph', 'score_home', 'score_away', 'stadium_neutral', 'home_favorite',
       'away_favorite', 'hm_avg_pts_diff','aw_avg_pts_diff', 'elo1_pre', 'elo2_pre', 'elo_prob1', 'over', 'result']]

In [59]:
# removing all rows with null values
df = df.dropna(how='any',axis=0) 

### FEATURE AND MODEL TESTING

In [60]:
# initial features possible for model
X = df[['schedule_season', 'schedule_week', 'over_under_line', 'spread_favorite', 'weather_temperature', 'weather_wind_mph',
        'home_favorite', 'hm_avg_pts_diff','aw_avg_pts_diff', 'elo1_pre', 'elo2_pre', 'elo_prob1']]

y = df['result']

In [61]:
# base model
base = LDA()

# choose 5 best features
rfe = RFE(estimator=base, n_features_to_select=5, step=1)
rfe = rfe.fit(X, y)

# features
print(rfe.support_)
print(rfe.ranking_)

[False False False  True False False  True  True  True False False  True]
[3 8 7 1 2 6 1 1 1 5 4 1]


In [62]:
final_x = rfe.transform(X)

In [63]:
# prepare models
models = []

models.append(('LRG', LogisticRegression(solver='liblinear')))
models.append(('KNB', KNeighborsClassifier()))
models.append(('GNB', GaussianNB()))
models.append(('XGB', xgb.XGBClassifier(random_state=26)))
models.append(('RFC', RandomForestClassifier(random_state=26, n_estimators=100)))
models.append(('DTC', DecisionTreeClassifier(random_state=26, criterion='entropy', max_depth=5)))

# evaluate each model by average and standard deviations of roc auc 
results = []
names = []

for name, m in models:
    kfold = model_selection.KFold(n_splits=5)
    cv_results = model_selection.cross_val_score(m, final_x, y, cv=kfold, scoring = 'roc_auc')
    results.append(cv_results)
    names.append(name)
    msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
    print(msg)

LRG: 0.688545 (0.013456)
KNB: 0.618345 (0.014315)
GNB: 0.687603 (0.014987)
XGB: 0.656877 (0.010772)
RFC: 0.654303 (0.009482)
DTC: 0.689185 (0.016106)


In [64]:
# training and testing data (2017 and 2018)
train = df.copy()
test = df.copy()
train = train.loc[train['schedule_season'] < 2017]
test = test.loc[test['schedule_season'] > 2016]
X_train = train[['over_under_line', 'spread_favorite', 'home_favorite', 'hm_avg_pts_diff', 'elo_prob1']]
y_train = train['result']
X_test = test[['over_under_line', 'spread_favorite', 'home_favorite', 'hm_avg_pts_diff', 'elo_prob1']]
y_test = test['result']
train.head()

Unnamed: 0,schedule_date,schedule_season,schedule_week,team_home,team_away,team_favorite_id,spread_favorite,over_under_line,weather_temperature,weather_wind_mph,...,stadium_neutral,home_favorite,away_favorite,hm_avg_pts_diff,aw_avg_pts_diff,elo1_pre,elo2_pre,elo_prob1,over,result
14,1979-09-06,1979,2,DEN,LAR,DEN,-2.0,31.5,67.0,8.0,...,0,1.0,0.0,10.0,-7.0,1592.275,1544.396,0.656962,0.0,0
15,1979-09-09,1979,2,IND,TB,PICK,0.0,33.0,64.0,8.0,...,0,0.0,0.0,-14.0,15.0,1397.713,1416.381,0.566285,1.0,0
16,1979-09-09,1979,2,BUF,CIN,CIN,-3.0,34.0,54.0,7.0,...,0,0.0,1.0,-2.0,-10.0,1414.802,1478.226,0.502268,1.0,1
17,1979-09-09,1979,2,CHI,MIN,CHI,-6.0,31.5,60.0,11.0,...,0,1.0,0.0,3.0,6.0,1495.817,1510.094,0.572482,1.0,1
18,1979-09-09,1979,2,DET,WAS,PICK,0.0,35.5,72.0,0.0,...,0,0.0,0.0,-15.0,-2.0,1455.863,1459.567,0.587308,1.0,0


In [65]:
# calibrate probabilities and fit model to training data
boost = xgb.XGBClassifier()
dtc = DecisionTreeClassifier(max_depth=5, criterion='entropy')
lrg = LogisticRegression(solver='liblinear')
vote = VotingClassifier(estimators=[('boost', boost), ('dtc', dtc), ('lrg', lrg)], voting='soft')

model = CCV(vote, method='isotonic', cv=3)
model.fit(X_train, y_train)

In [66]:
# predict probabilities
predicted = model.predict_proba(X_test)[:,1]

In [67]:
# ROC AUC Score higher is better while Brier Score the lower the better
print("Metrics" + "\t\t" + "My Model" + "\t" + "Elo Results")
print("ROC AUC Score: " +  "\t" + "{:.4f}".format(roc_auc_score(y_test, predicted)) + "\t\t" + "{:.4f}".format(roc_auc_score(test.result, test.elo_prob1)))
print("Brier Score: " + "\t" + "{:.4f}".format(brier_score_loss(y_test, predicted)) + "\t\t" + "{:.4f}".format(brier_score_loss(test.result, test.elo_prob1)))

Metrics		My Model	Elo Results
ROC AUC Score: 	0.7009		0.6713
Brier Score: 	0.2142		0.2225


In [68]:
# creating a column with the models probabilities to analyze vs elo fivethirtyeight
test.loc[:,'hm_prob'] = predicted
test = test[['schedule_season', 'schedule_week', 'team_home', 'team_away', 'elo_prob1', 'hm_prob', 'result']]

In [69]:
# calulate bets won (only make a bet when probability is greater than / equal to 60% or less than / equal to 40%)
test['my_bet_won'] = (((test.hm_prob >= 0.60) & (test.result == 1)) | ((test.hm_prob <= 0.40) & (test.result == 0))).astype(int)
test['elo_bet_won'] = (((test.elo_prob1 >= 0.60) & (test.result == 1)) | ((test.elo_prob1 <= 0.40) & (test.result == 0))).astype(int)

# calulate bets lost (only make a bet when probability is greater than / equal to 60% or less than / equal to 40%)
test['my_bet_lost'] = (((test.hm_prob >= 0.60) & (test.result == 0)) | ((test.hm_prob <= 0.40) & (test.result == 1))).astype(int)
test['elo_bet_lost'] = (((test.elo_prob1 >= 0.60) & (test.result == 0)) | ((test.elo_prob1 <= 0.40) & (test.result == 1))).astype(int)

In [70]:
# printing some quick overall results for my model
print("My Model Win Percentage: " + "{:.4f}".format(test.my_bet_won.sum() / (test.my_bet_lost.sum() + test.my_bet_won.sum())))
print("Total Number of Bets Won: " + str(test.my_bet_won.sum()))
print("Total Number of Bets Made: " + str((test.my_bet_lost.sum() + test.my_bet_won.sum())))
print("Possible Games: " + str(len(test)))

My Model Win Percentage: 0.7168
Total Number of Bets Won: 281
Total Number of Bets Made: 392
Possible Games: 569


In [71]:
# printing some quick overall results for fivethirtyeight's ELO model
print("ELO Model Win Percentage: " + "{:.4f}".format(test.elo_bet_won.sum()/(test.elo_bet_lost.sum() + test.elo_bet_won.sum())))
print("Total Number of Bets Won: " + str(test.elo_bet_won.sum()))
print("Total Number of Bets Made: " + str((test.elo_bet_lost.sum() + test.elo_bet_won.sum())))
print("Possible Games: " + str(len(test)))

ELO Model Win Percentage: 0.6737
Total Number of Bets Won: 256
Total Number of Bets Made: 380
Possible Games: 569


In [72]:
# creating week by week results
results_df = test.groupby(['schedule_season', 'schedule_week']).agg({'team_home' : 'count', 'my_bet_won' : 'sum', 
'elo_bet_won' : 'sum', 'my_bet_lost' : 'sum', 'elo_bet_lost' : 'sum'}).reset_index().rename(columns=
                                                                                            {'team_home' : 'total_games'})

# counting total bets for my model and the ELO model (prob >= 60% or prob <= 40%)
results_df['total_bets'] = results_df.my_bet_won + results_df.my_bet_lost
results_df['elo_total_bets'] = results_df.elo_bet_won + results_df.elo_bet_lost

# creating accuracy columns based on bets made not on total games
results_df['bet_accuracy'] = round((results_df.my_bet_won / results_df.total_bets) * 100, 2)
results_df['elo_bet_accuracy'] = round((results_df.elo_bet_won / results_df.elo_total_bets) * 100, 2)
results_df = results_df[['schedule_season', 'schedule_week', 'bet_accuracy', 'elo_bet_accuracy',
                         'total_bets', 'elo_total_bets', 'total_games']]

results_df.head()

Unnamed: 0,schedule_season,schedule_week,bet_accuracy,elo_bet_accuracy,total_bets,elo_total_bets,total_games
0,2017,1,66.67,70.0,6,10,14
1,2017,2,75.0,87.5,8,8,13
2,2017,3,54.55,62.5,11,8,15
3,2017,4,55.56,62.5,9,8,15
4,2017,5,57.14,62.5,7,8,13


# WEEK 1