In [1]:
import numpy as np
import pandas as pd

In [99]:
class EloCalculator:
    def __init__(self, events_path, elo_path):
        self.elo_path = elo_path
        self.events = pd.read_csv('data/events_elo.csv')
        self.elo_table = pd.read_csv(self.elo_path)
        self.elo_dict = dict(zip(self.elo_table['player_id'], self.elo_table['elo_rating']))

    def calc_season_elo(self, k=2):
        self.k = k
        for _, row in self.events.iterrows():
            self.update_elo(row['bat_id'], row['pit_id'], row['batter_score'], row['pitcher_score'])

    def update_elo(self, bat_id, pit_id, bat_score, pit_score):
            bat_exp, pit_exp = self.get_expected(bat_id, pit_id)
            bat_curr = self.elo_dict[bat_id]
            pit_curr = self.elo_dict[pit_id]
            
            bat_new = self.calc_new(bat_curr, bat_score, bat_exp)
            pit_new = self.calc_new(pit_curr, pit_score, pit_exp)

            self.elo_dict[bat_id] = bat_new
            self.elo_dict[pit_id] = pit_new

    def get_expected(self, bat_id, pit_id):
        bat_diff = self.elo_dict[bat_id] - elo_dict[pit_id]
        pit_diff = -bat_diff
        return self.calc_expected(bat_diff), self.calc_expected(pit_diff)

    def calc_expected(self, diff):
        return 1/(1+(np.power(10, (diff/400))))

    def calc_new(self, curr, score, exp):
        return curr+(self.k*(score-exp))
    
    def get_dict(self):
        return self.elo_dict
    
    def get_df(self):
        list_dict = {}
        for k, v in self.elo_dict.items():
            list_dict[k] = [v]
        dfElo = pd.DataFrame.from_dict(list_dict, orient="index").reset_index()
        dfElo.columns = ["player_id", "elo"]
        return dfElo
        
    def reset_df(self):
        self.elo_table = pd.read_csv(self.elo_path)        

In [102]:
elo = EloCalculator('data/events_elo.csv', 'data/elo.csv')
elo.calc_season_elo()

In [103]:
dfElo = elo.get_df()

In [104]:
dfElo.head()

Unnamed: 0,player_id,elo
0,zimmr001,978.987428
1,zimmj003,2231.946184
2,wertj001,1241.275871
3,waltz001,1498.856074
4,tracc001,1415.709909


In [117]:
dfBat = pd.read_csv('data/batters.csv')
dfPit = pd.read_csv('data/pitchers.csv')

dfBat = pd.merge(dfBat, dfElo, left_on='player_id', right_on='player_id')
dfPit = pd.merge(dfPit, dfElo, left_on='player_id', right_on='player_id')

dfBat.to_csv('data/bat_elo.csv')
dfPit.to_csv('data/pit_elo.csv')

In [115]:
dfBat.head()

Unnamed: 0,player_id,nameFirst,nameLast,score,elo
0,cabrm001,Miguel,Cabrera,377.68,1181.091551
1,goldp001,Paul,Goldschmidt,349.07,1045.904104
2,troum001,Mike,Trout,347.8,1151.714699
3,davic003,Chris,Davis,341.03,1026.470536
4,canor001,Robinson,Cano,323.92,1014.934505


In [116]:
dfPit.head()

Unnamed: 0,player_id,nameFirst,nameLast,score,elo
0,kersc001,Clayton,Kershaw,463.95,2279.934282
1,schem001,Max,Scherzer,446.2,2247.087803
2,darvy001,Yu,Darvish,427.6,2309.29852
3,waina001,Adam,Wainwright,411.75,2424.482538
4,lee-c003,Cliff,Lee,388.2,2178.640151
