In [1]:
import numpy as np
import pandas as pd
import json

In [13]:
class EloCalculator:    
    def __init__(self, events_path, batter_path, pitcher_path, elo_path, starting_elo=1500):
        self.dfEvents = pd.read_csv(events_path)
        self.dfBatter = pd.read_csv(batter_path)
        self.dfPitcher = pd.read_csv(pitcher_path)
        
        self.elo_table = pd.read_csv(elo_path)
        self.elo_dict = dict(zip(self.elo_table['player_id'], self.elo_table['elo_rating']))
        
        self.starting_elo = starting_elo
        
        self.batters = {'players': []}
        self.pitchers = {'players': []}

        # Pandas .apply() is the worst
        for _, row in self.dfBatter.iterrows():
            self.batters['players'].append(self.create_players_dict(row, self.batters))
            
        for _, row in self.dfPitcher.iterrows():
            self.pitchers['players'].append(self.create_players_dict(row, self.pitchers))
        
    def create_players_dict(self, row, players_dict):
        players_dict['players'].append(self.get_player_object(
            row['player_id'], row['nameFirst'], row['nameLast'], row['teamID']))
        
    def get_player_object(self, player_id, first_name, last_name, team):
        player = {
            'player_id': player_id,
            'name': first_name + ' ' + last_name,
            'team': team,
            'elo': [self.starting_elo],
            'norm_elo': [0]
        }
        return player
        
    def calc_season_elo(self, k=2):
        self.k = k
        num_rows = self.dfEvents.shape[0]
        prev_prop = -1
        i = 0
        
        for _, row in self.dfEvents.iterrows():
            curr_prop = i/num_rows
            if (curr_prop - prev_prop) > 1/16:
                self.save_snapshot()
                prev_prop = curr_prop

            self.update_elo(row['bat_id'], row['pit_id'], row['batter_score'], row['pitcher_score'])
            i += 1
            
    def save_snapshot(self):
        bat_elos = []
        pit_elos = []
        
        # Welcome to hell
        print(self.batters['players'])
        for player_object in self.batters['players']:
            print(player_object)
            bat_elos.append(self.elo_dict[player_object['player_id']])

        for player_object in self.pitchers['players']:
            pit_elos.append(self.elo_dict[player_object['player_id']])
        
        bat_mean = np.mean(bat_elos)
        bat_std = np.std(bat_elos)
        
        pit_mean = np.mean(pit_elos)
        pit_std = np.std(pit_elos)
        
        for player_object in self.batters['players']:
            current_elo = self.elo_dict[player_object['player_id']]
            player_object['elo'].append(current_elo)
            player_object['norm_elo'].append((current_elo-bat_mean)/bat_std)
            
        for player_object in self.pitchers['players']:
            current_elo = self.elo_dict[player_object['player_id']]
            player_object['elo'].append(current_elo)
            player_object['norm_elo'].append((current_elo-pit_mean)/pit_std)

    def update_elo(self, bat_id, pit_id, bat_score, pit_score):
            bat_exp, pit_exp = self.get_expected(bat_id, pit_id)
            bat_curr = self.elo_dict[bat_id]
            pit_curr = self.elo_dict[pit_id]
            
            bat_new = self.calc_new(bat_curr, bat_score, bat_exp)
            pit_new = self.calc_new(pit_curr, pit_score, pit_exp)

            self.elo_dict[bat_id] = bat_new
            self.elo_dict[pit_id] = pit_new

    def get_expected(self, bat_id, pit_id):
        bat_diff = self.elo_dict[bat_id] - self.elo_dict[pit_id]
        pit_diff = -bat_diff
        return self.calc_expected(bat_diff), self.calc_expected(pit_diff)

    def calc_expected(self, diff):
        return 1/(1+(np.power(10, (diff/400))))

    def calc_new(self, curr, score, exp):
        return curr+(self.k*(score-exp))    

In [14]:
elo = EloCalculator('data/events_elo.csv', 'data/batters.csv', 'data/pitchers.csv', 'data/elo.csv')
elo.calc_season_elo()

[{'player_id': 'cabrm001', 'name': 'Miguel Cabrera', 'team': 'DET', 'elo': [1500], 'norm_elo': [0]}, None, {'player_id': 'goldp001', 'name': 'Paul Goldschmidt', 'team': 'ARI', 'elo': [1500], 'norm_elo': [0]}, None, {'player_id': 'troum001', 'name': 'Mike Trout', 'team': 'LAA', 'elo': [1500], 'norm_elo': [0]}, None, {'player_id': 'canor001', 'name': 'Robinson Cano', 'team': 'NYA', 'elo': [1500], 'norm_elo': [0]}, None, {'player_id': 'vottj001', 'name': 'Joey Votto', 'team': 'CIN', 'elo': [1500], 'norm_elo': [0]}, None, {'player_id': 'belta001', 'name': 'Adrian Beltre', 'team': 'TEX', 'elo': [1500], 'norm_elo': [0]}, None, {'player_id': 'fielp001', 'name': 'Prince Fielder', 'team': 'DET', 'elo': [1500], 'norm_elo': [0]}, None, {'player_id': 'mccua001', 'name': 'Andrew McCutchen', 'team': 'PIT', 'elo': [1500], 'norm_elo': [0]}, None, {'player_id': 'carpm002', 'name': 'Matt Carpenter', 'team': 'SLN', 'elo': [1500], 'norm_elo': [0]}, None, {'player_id': 'encae001', 'name': 'Edwin Encarnacio

TypeError: 'NoneType' object is not subscriptable

In [None]:
elo.batters

In [None]:
dfElo = elo.get_df()

In [None]:
dfElo.head()

In [None]:
elo.snapshots[1]

In [None]:
dfBat = pd.read_csv('data/batters.csv')
dfPit = pd.read_csv('data/pitchers.csv')

dfBat = pd.merge(dfElo, dfBat, left_on='player_id', right_on='player_id')
dfBat = pd.merge(elo.elo_table, dfBat, left_on=['player_id'],
                 right_on=['player_id'])

dfPit = pd.merge(dfElo, dfPit, left_on='player_id', right_on='player_id')
dfPit = pd.merge(elo.elo_table, dfPit, left_on=['player_id'],
                 right_on=['player_id'])

dfBat = dfBat.drop_duplicates(subset=['player_id', 'C'])
dfPit = dfPit.drop_duplicates(subset=['player_id', 'C'])

dfBat.to_csv('output/bat_elo.csv')
dfPit.to_csv('output/pit_elo.csv')

In [None]:
dfBat

In [None]:
events = pd.read_csv('data/events_elo.csv')
events.head()

In [None]:
dfBat.head()

In [None]:
elo.elo_table.head()