In [23]:
import numpy as np
import pandas as pd
from sklearn.metrics import log_loss

In [24]:
K = 20.

In [25]:
home_adv = 100.

In [26]:
data_dir = '../input'

In [27]:
reg_season = pd.read_csv('./DataFiles/RegularSeasonCompactResults.csv')
reg_season.head()

Unnamed: 0,Season,DayNum,WTeamID,WScore,LTeamID,LScore,WLoc,NumOT
0,1985,20,1228,81,1328,64,N,0
1,1985,25,1106,77,1354,70,H,0
2,1985,25,1112,63,1223,56,H,0
3,1985,25,1165,70,1432,54,H,0
4,1985,25,1192,86,1447,74,H,0


In [28]:
team_ids = set(reg_season.WTeamID).union(set(reg_season.LTeamID))
len(team_ids)

364

In [29]:
# This dictionary will be used as a lookup for current
# scores while the algorithm is iterating through each game

In [30]:
elo_dict = dict(zip(list(team_ids), [1500] * len(team_ids)))


In [31]:
#New Column to help up iterate through the ELOs

reg_season['Margin'] = (reg_season.WScore - reg_season.LScore)
reg_season['WELO'] = None
reg_season['LELO'] = None

In [32]:
#ELO Calculation 

def elo_pred(elo1, elo2):
    return (1. / (10. ** (-(elo1 - elo2) / 400.) + 1.))

def expected_margin(elo_diff):
    return ((7.5 + 0.006 * elo_diff))

def elo_update(WELO, LELO, Margin):
    elo_diff = WELO - LELO
    pred = elo_pred(WELO, LELO)
    mult = ((margin + 3.) ** 0.8) / expected_margin(elo_diff)
    update = K * mult * (1. - pred)
    return (pred, update)

In [33]:
#Now on to iterating over the games in the DF 

In [34]:
assert np.all(reg_season.index.values == np.array(range(reg_season.shape[0]))), 'Index is out of order'

In [35]:
preds = []

# Loop over all rows of the games dataframe
for i in range(reg_season.shape[0]):
    
    # Get key data from current row
    win = reg_season.at[i, 'WTeamID']
    loss = reg_season.at[i, 'LTeamID']
    margin = reg_season.at[i, 'Margin']
    wloc = reg_season.at[i, 'WLoc']
    
    # Does either team get a home-court advantage?
    win_ad = 0.0
    loss_ad = 0.0
    if wloc == "H":
        win_ad += home_adv
    elif wloc == "A":
        loss_ad += home_adv
    
    # Get elo updates as a result of the game
    pred, update = elo_update(elo_dict[win] + win_ad, elo_dict[loss] + loss_ad, margin)
    elo_dict[win] += update
    elo_dict[loss] -= update
    preds.append(pred)

    # Stores new elos in the games dataframe
    reg_season.loc[i, 'WELO'] = elo_dict[win]
    reg_season.loc[i, 'LELO'] = elo_dict[loss]

In [36]:
reg_season.tail()

Unnamed: 0,Season,DayNum,WTeamID,WScore,LTeamID,LScore,WLoc,NumOT,Margin,WELO,LELO
150679,2017,132,1276,71,1458,56,N,0,15,1904.71,1936.38
150680,2017,132,1343,71,1463,59,N,0,12,1710.38,1584.06
150681,2017,132,1348,70,1433,63,N,0,7,1790.62,1841.22
150682,2017,132,1374,71,1153,56,N,0,15,1965.93,1906.94
150683,2017,132,1407,59,1402,53,N,0,6,1447.51,1449.7


In [37]:
reg_season.head()

Unnamed: 0,Season,DayNum,WTeamID,WScore,LTeamID,LScore,WLoc,NumOT,Margin,WELO,LELO
0,1985,20,1228,81,1328,64,N,0,17,1514.65,1485.35
1,1985,25,1106,77,1354,70,H,0,7,1505.61,1494.39
2,1985,25,1112,63,1223,56,H,0,7,1505.61,1494.39
3,1985,25,1165,70,1432,54,H,0,16,1509.37,1490.63
4,1985,25,1192,86,1447,74,H,0,12,1507.76,1492.24


In [38]:
np.mean(-np.log(preds))

0.5322015765612655

In [39]:
def final_elo_per_season(df, team_id):
    copy = df.copy()
    copy = copy.loc[(copy.WTeamID == team_id) | (copy.LTeamID == team_id), :]
    copy.sort_values(['Season', 'DayNum'], inplace=True)
    copy.drop_duplicates(['Season'], keep='last', inplace=True)
    win_mask = copy.WTeamID == team_id
    loss_mask = copy.LTeamID == team_id
    copy['Season_ELO'] = None
    copy.loc[win_mask, 'Season_ELO'] = copy.loc[win_mask, 'WELO']
    copy.loc[loss_mask, 'Season_ELO'] = copy.loc[loss_mask, 'LELO']
    out = pd.DataFrame({
        
        'Team_ID' : team_id,
        'Season' : copy.Season,
        'Season_ELO': copy.Season_ELO
    })
    
    return(out)

In [40]:
new_list = [final_elo_per_season(reg_season, i) for i in team_ids]
season_elos = pd.concat(new_list)

In [41]:
season_elos.sample(10)

Unnamed: 0,Season,Season_ELO,Team_ID
118442,2011,1439.96,1110
123740,2012,1498.65,1150
107798,2009,1417.42,1259
108099,2009,1548.89,1138
118630,2011,1806.06,1350
51857,1997,1574.25,1233
128698,2013,1249.66,1357
51926,1997,1397.4,1140
64992,2000,1704.97,1160
47862,1996,1281.06,1200


In [43]:
season_elos.to_csv('season_elos.csv', index=None)