In [1]:
# Dependencies
import pandas as pd

In [2]:
# importing the clean data set of all games since 2002 (outside of current season)
data_df = pd.read_csv(r'data\clean_data_set.csv', index_col='datetime')
data_df.drop('Unnamed: 0', axis = 1, inplace=True)
data_df = data_df.sort_values('datetime')

In [3]:
# creates dictionary of form {team : current elo score}, uses old dictionary to generate since typing out 32 names is tedious.
teams = {
    'crd' : 'ARI', 'atl' : 'ATL','rav' : 'BAL','buf' : 'BUF', 
    'car' : 'CAR', 'chi' : 'CHI', 'cin' : 'CIN', 'cle' : 'CLE', 
    'dal' : 'DAL', 'den' : 'DEN', 'det' : 'DET', 'gnb' : 'GNB', 
    'htx' : 'HOU', 'clt' : 'IND', 'jax' : 'JAX', 'kan' : 'KAN', 
    'sdg' : 'LAC', 'ram' : 'LAR', 'mia' : 'MIA', 'min' : 'MIN', 
    'nwe' : 'NWE', 'nor' : 'NOR', 'nyg' : 'NYG', 'nyj' : 'NYJ', 
    'rai' : 'LVR', 'phi' : 'PHI', 'pit' : 'PIT', 'sfo' : 'SFO', 
    'sea' : 'SEA', 'tam' : 'TAM', 'oti' : 'TEN', 'was' : 'WAS'
    
}

elo_dict = {}
elo_init_value = 1300
k = 20
for i in teams:
    elo_dict[teams[i]] = elo_init_value

In [4]:
# ELO Functions
def elo_expected(teamA, teamB):
    rating_A = elo_dict[teamA]
    rating_B = elo_dict[teamB]
    expected_A = 1 / (1 + 10 ** ((rating_B - rating_A) / 400 ))
    expected_B = 1 / (1 + 10 ** ((rating_A - rating_B) / 400 ))
    return [expected_A, expected_B]

def elo_update(teamA, teamB, winner):
    elo_status = elo_expected(teamA, teamB)
    if winner == 1:
        teamA_update = elo_dict[teamA] + k * (1 - elo_status[0])
        teamB_update = elo_dict[teamB] + k * (0 - elo_status[1])
    elif winner == 0:
        teamA_update = elo_dict[teamA] + k * (0 - elo_status[0])
        teamB_update = elo_dict[teamB] + k * (1 - elo_status[1])
    else: # this deals with tie games, just returns the original ELO score
        teamA_update = elo_dict[teamA]
        teamB_update = elo_dict[teamB]
    return [teamA_update, teamB_update]

In [5]:
data_df.head()

Unnamed: 0_level_0,team,season,opp,day_week,week,W/L,ot,record,home/away,team_score,...,team_rush_yards,team_turnover,1D_allowed,opp_total_yards,opp_pass_yards,opp_rush_yards,opp_turnover,offense_expected_points,defense_expected_points,sp_expected_points
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2002-09-05 20:38:00,NYG,2002,SFO,Thu,1,0,0.0,0-1,HOME,13.0,...,43.0,3.0,13.0,279.0,166.0,113.0,1.0,-4.72,-0.79,3.46
2002-09-08 13:04:00,CAR,2002,BAL,Sun,1,1,0.0,1-0,HOME,10.0,...,145.0,0.0,15.0,289.0,212.0,77.0,1.0,-4.38,6.68,4.2
2002-09-08 13:04:00,GNB,2002,ATL,Sun,1,1,1.0,1-0,HOME,37.0,...,211.0,1.0,22.0,374.0,194.0,180.0,1.0,22.78,-16.03,-3.75
2002-09-08 13:04:00,CIN,2002,LAC,Sun,1,0,0.0,0-1,HOME,6.0,...,36.0,1.0,27.0,401.0,160.0,241.0,0.0,-7.0,-22.74,3.17
2002-09-08 13:04:00,BUF,2002,NYJ,Sun,1,0,1.0,0-1,HOME,31.0,...,142.0,3.0,18.0,266.0,193.0,73.0,1.0,3.16,0.59,-7.81


In [14]:
# adds columns for ELOs before and after
team_1_elo_before = []
team_1_elo_after = []
team_2_elo_before = []
team_2_elo_after = []
counter = 0

for i in range(len(data_df)):
    team_1_elo_before.append(elo_dict[data_df['team'][counter]])
    team_2_elo_before.append(elo_dict[data_df['opp'][counter]])
    updated_elo = elo_update(data_df['team'][counter], data_df['opp'][counter], data_df['W/L'][counter])
    team_1_elo_after.append(updated_elo[0])
    team_2_elo_after.append(updated_elo[1])
    elo_dict[data_df['team'][counter]] = updated_elo[0]
    elo_dict[data_df['opp'][counter]] = updated_elo[1]
    counter += 1
    
data_df['team_elo_before'] = team_1_elo_before
data_df['team_elo_after'] = team_1_elo_after
data_df['opp_elo_before'] = team_2_elo_before
data_df['opp_elo_after'] = team_2_elo_after

In [15]:
data_df.tail()

Unnamed: 0_level_0,team,season,opp,day_week,week,W/L,ot,record,home/away,team_score,...,opp_pass_yards,opp_rush_yards,opp_turnover,offense_expected_points,defense_expected_points,sp_expected_points,team_elo_before,team_elo_after,opp_elo_before,opp_elo_after
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-01-12 15:05:00,KAN,2019,HOU,Sun,19,1,0.0,13-4,HOME,51.0,...,348.0,94.0,1.0,28.73,-5.46,-6.77,1425.985013,1433.031611,1320.223273,1313.176675
2020-01-12 18:40:00,GNB,2019,SEA,Sun,19,1,0.0,14-3,HOME,28.0,...,265.0,110.0,0.0,18.63,-15.53,1.8,1365.587364,1376.348881,1392.096482,1381.334966
2020-01-19 15:05:00,KAN,2019,TEN,Sun,20,1,0.0,14-4,HOME,35.0,...,210.0,85.0,0.0,29.52,-5.92,-9.99,1433.031611,1439.709035,1313.039937,1306.362513
2020-01-19 18:40:00,SFO,2019,GNB,Sun,20,1,0.0,15-3,HOME,37.0,...,296.0,62.0,3.0,18.03,-5.47,2.28,1308.221318,1320.157435,1376.348881,1364.412763
2020-02-02 18:30:00,SFO,2019,KAN,Sun,21,0,0.0,15-4,NEUTRAL,20.0,...,268.0,129.0,2.0,5.19,-18.54,1.58,1320.157435,1313.468738,1439.709035,1446.397733


In [16]:
data_df.to_csv(r'data\elo_data_set')