In [1]:
import numpy as np
import pandas as pd
import json
from collections import defaultdict

In [10]:
np.seterr('print')
class EloCalculator:
    def __init__(self, events_path, batter_path, pitcher_path, elo_path, starting_elo=1500):
        self.dfEvents = pd.read_csv(events_path)
        self.dfBatter = pd.read_csv(batter_path)
        self.dfPitcher = pd.read_csv(pitcher_path)
        
        self.elo_table = pd.read_csv(elo_path)
        self.elo_dict = dict(zip(self.elo_table['player_id'], self.elo_table['elo_rating']))
        
        self.starting_elo = starting_elo
        
        self.batters = []
        
        self.pitchers = []
        
        self.current_index = 0

        # Pandas .apply() is the worst
        for _, row in self.dfBatter.iterrows():
            self.add_player(row, self.batters)
            
        for _, row in self.dfPitcher.iterrows():
            self.add_player(row, self.batters)
        
    def add_player(self, row, players_dict):
        players_dict.append(self.get_player_object(
            row['player_id'], row['nameFirst'], row['nameLast'], row['teamID']))
        
    def get_player_object(self, player_id, first_name, last_name, team):
        player = {
            'player_id': player_id,
            'name': first_name + ' ' + last_name,
            'team': team,
            'elo': [self.starting_elo],
            'norm_elo': [0]
        }
        return player
        
    def calc_season_elo(self, k=2):
        self.k = k
        num_rows = self.dfEvents.shape[0]
        prev_prop = 0
        i = 0
        
        for _, row in self.dfEvents.iterrows():
            curr_prop = i/num_rows
            if (curr_prop - prev_prop) > 1/10:
                print(curr_prop)
                self.save_snapshot()
                prev_prop = curr_prop

            self.update_elo(row['bat_id'], row['pit_id'], row['batter_score'], row['pitcher_score'])
            i += 1
            
    def save_snapshot(self):
        bat_elos = []
        pit_elos = []
        bat_max = 0
        pit_max = 0
        # Welcome to hell
        for player_object in self.batters:
            current_elo = self.elo_dict[player_object['player_id']]
            if current_elo > bat_max:
                bat_max = current_elo
            bat_elos.append(current_elo)

        for player_object in self.pitchers:
            current_elo = self.elo_dict[player_object['player_id']]
            if current_elo > pit_max:
                pit_max = current_elo
            pit_elos.append(current_elo)
        
        bat_mean = np.mean(bat_elos)
        pit_mean = np.mean(pit_elos)

        for player_object in self.batters:
            current_elo = self.elo_dict[player_object['player_id']]
            player_object['elo'].append(current_elo)
            player_object['norm_elo'].append((current_elo-bat_mean)/bat_max)
            
        for player_object in self.pitchers:
            current_elo = self.elo_dict[player_object['player_id']]
            player_object['elo'][self.current_index] = current_elo
            player_object['norm_elo'].append((current_elo-pit_mean)/pit_max)

    def update_elo(self, bat_id, pit_id, bat_score, pit_score):
            bat_exp, pit_exp = self.get_expected(bat_id, pit_id)
            bat_curr = self.elo_dict[bat_id]
            pit_curr = self.elo_dict[pit_id]
            
            bat_new = self.calc_new(bat_curr, bat_score, bat_exp)
            pit_new = self.calc_new(pit_curr, pit_score, pit_exp)

            self.elo_dict[bat_id] = bat_new
            self.elo_dict[pit_id] = pit_new

    def get_expected(self, bat_id, pit_id):
        bat_diff = self.elo_dict[bat_id] - self.elo_dict[pit_id]
        pit_diff = -bat_diff
        return self.calc_expected(bat_diff), self.calc_expected(pit_diff)

    def calc_expected(self, diff):
        return 1/(1+(np.power(10, (diff/400))))

    def calc_new(self, curr, score, exp):
        return curr+(self.k*(score-exp))    

In [11]:
elo = EloCalculator('data/events_elo.csv', 'data/batters.csv', 'data/pitchers.csv', 'data/elo.csv')
elo.calc_season_elo()

0.10000051613169617


  out=out, **kwargs)


0.20000103226339233
0.30000154839508847
0.40000206452678466
0.5000025806584808
0.6000030967901769
0.7000036129218732
0.8000041290535693
0.9000046451852655


In [15]:
batter_json = {'players': elo.batters}
pitcher_json = {'players': elo.pitchers}


with open('output/batters.json', 'w') as outfile:
    json.dump(batter_json, outfile)
    
with open('output/pitchers.json', 'w') as outfile:
    json.dump(pitcher_json, outfile)

In [None]:
dfElo = elo.get_df()

In [None]:
dfElo.head()

In [None]:
elo.snapshots[1]

In [None]:
dfBat = pd.read_csv('data/batters.csv')
dfPit = pd.read_csv('data/pitchers.csv')

dfBat = pd.merge(dfElo, dfBat, left_on='player_id', right_on='player_id')
dfBat = pd.merge(elo.elo_table, dfBat, left_on=['player_id'],
                 right_on=['player_id'])

dfPit = pd.merge(dfElo, dfPit, left_on='player_id', right_on='player_id')
dfPit = pd.merge(elo.elo_table, dfPit, left_on=['player_id'],
                 right_on=['player_id'])

dfBat = dfBat.drop_duplicates(subset=['player_id', 'C'])
dfPit = dfPit.drop_duplicates(subset=['player_id', 'C'])

dfBat.to_csv('output/bat_elo.csv')
dfPit.to_csv('output/pit_elo.csv')

In [None]:
dfBat

In [None]:
events = pd.read_csv('data/events_elo.csv')
events.head()

In [None]:
dfBat.head()

In [None]:
elo.elo_table.head()