# I. Comparing Event Run Values

In [1]:
#Load packages
import pandas as pd
import numpy as np

In [2]:
def Run_Expectancy(path):
    
    RE = pd.read_csv(path)
    RE.drop(['Unnamed: 0'], axis=1, inplace=True)
    RE = RE[['home_team','away_team','half','gameId','batterName','batterId','event', 'start1B', 'start2B', 'start3B',\
             'end1B', 'end2B', 'end3B', 'startOuts','endOuts','runsFuture','runsOnPlay','outsInInning','venueId','batterPos']]
    RE['Start1'] = np.where(pd.isnull(RE['start1B']),0,1)
    RE['Start2'] = np.where(pd.isnull(RE['start2B']),0,1)
    RE['Start3'] = np.where(pd.isnull(RE['start3B']),0,1)
    RE['Start_State'] = (RE['Start1'].astype(str) + RE['Start2'].astype(str) + RE['Start3'].astype(str)+\
                          " " + RE['startOuts'].astype(str))
    RE['End1'] = np.where(pd.isnull(RE['end1B']),0,1)
    RE['End2'] = np.where(pd.isnull(RE['end2B']),0,1)
    RE['End3'] = np.where(pd.isnull(RE['end3B']),0,1)
    RE['End_State'] = (RE['End1'].astype(str) + RE['End2'].astype(str) + RE['End3'].astype(str) + \
                        " " + RE['endOuts'].astype(str))
    RE = RE[((RE.Start_State != RE.End_State) | (RE.runsOnPlay > 0)) & (RE.outsInInning == 3)]
    Start_RunExp = RE.groupby(['Start_State'])['runsFuture'].mean().reset_index().rename(columns={'runsFuture':'Start_RE'})
    RE = pd.merge(RE, Start_RunExp, on=['Start_State'], how='left')
    Base_State_3 = [pd.Series(['000 3', 0], index=Start_RunExp.columns),
                pd.Series(['001 3', 0], index=Start_RunExp.columns),
                pd.Series(['010 3', 0], index=Start_RunExp.columns),
                pd.Series(['011 3', 0], index=Start_RunExp.columns),
                pd.Series(['100 3', 0], index=Start_RunExp.columns),
                pd.Series(['101 3', 0], index=Start_RunExp.columns),
                pd.Series(['110 3', 0], index=Start_RunExp.columns),
                pd.Series(['111 3', 0], index=Start_RunExp.columns)]
    Start_RunExp = Start_RunExp.append(Base_State_3, ignore_index=True)
    End_RunExp  = Start_RunExp.rename(columns={'Start_State':'End_State', 'Start_RE':'End_RE'})
    RE = pd.merge(RE, End_RunExp, on=['End_State'], how='left')
    RE['Run_Value'] = RE['runsOnPlay'] + RE['End_RE'] - RE['Start_RE']
    
    return RE;

In [3]:
# Calculate run value for every event in seasons 2014-2017
RE_14 = Run_Expectancy("../MLBAM14.csv") 
RE_15 = Run_Expectancy("../MLBAM15.csv")                          
RE_16 = Run_Expectancy("../MLBAM16.csv") 
RE_17 = Run_Expectancy("../MLBAM17.csv") 

In [4]:
# Average run value for each type of event for every season
Event_Value14 = RE_14.groupby(['event'])['Run_Value'].mean().reset_index().rename(columns = {"Run_Value": 'RV14'})
Event_Value15 = RE_15.groupby(['event'])['Run_Value'].mean().reset_index().rename(columns = {"Run_Value": 'RV15'})
Event_Value16 = RE_16.groupby(['event'])['Run_Value'].mean().reset_index().rename(columns = {"Run_Value": 'RV16'})
Event_Value17 = RE_17.groupby(['event'])['Run_Value'].mean().reset_index().rename(columns = {"Run_Value": 'RV17'})

In [5]:
# Merge event run values into one dataframe
Event_Value = pd.merge(Event_Value14, Event_Value15, on=['event'], how='left')
Event_Value = pd.merge(Event_Value, Event_Value16, on=['event'], how='left')
Event_Value = pd.merge(Event_Value, Event_Value17, on=['event'], how='left')
display(Event_Value)

Unnamed: 0,event,RV14,RV15,RV16,RV17
0,Batter Interference,-0.319625,-0.363838,-0.284649,-0.430019
1,Bunt Groundout,-0.194784,-0.200346,-0.218826,-0.209411
2,Bunt Lineout,-0.30381,-0.421575,-0.352295,-0.328292
3,Bunt Pop Out,-0.31644,-0.354384,-0.342802,-0.373225
4,Catcher Interference,0.380337,0.318276,0.301623,0.39907
5,Double,0.737518,0.752039,0.743467,0.779338
6,Double Play,-0.828774,-0.854665,-0.864981,-0.897164
7,Fan interference,0.63356,0.577453,0.533316,0.590743
8,Field Error,0.462976,0.48546,0.469989,0.493206
9,Fielders Choice,0.698076,0.719351,0.701447,0.764112


In [6]:
# Drop row for Sacrifice Bunt DP
Event_Value = Event_Value[Event_Value.event != "Sacrifice Bunt DP"].copy()
display(Event_Value)

Unnamed: 0,event,RV14,RV15,RV16,RV17
0,Batter Interference,-0.319625,-0.363838,-0.284649,-0.430019
1,Bunt Groundout,-0.194784,-0.200346,-0.218826,-0.209411
2,Bunt Lineout,-0.30381,-0.421575,-0.352295,-0.328292
3,Bunt Pop Out,-0.31644,-0.354384,-0.342802,-0.373225
4,Catcher Interference,0.380337,0.318276,0.301623,0.39907
5,Double,0.737518,0.752039,0.743467,0.779338
6,Double Play,-0.828774,-0.854665,-0.864981,-0.897164
7,Fan interference,0.63356,0.577453,0.533316,0.590743
8,Field Error,0.462976,0.48546,0.469989,0.493206
9,Fielders Choice,0.698076,0.719351,0.701447,0.764112


In [7]:
# Compute correlation matrix
Event_Value.corr()

Unnamed: 0,RV14,RV15,RV16,RV17
RV14,1.0,0.998819,0.997951,0.997153
RV15,0.998819,1.0,0.99757,0.996412
RV16,0.997951,0.99757,1.0,0.994964
RV17,0.997153,0.996412,0.994964,1.0


In [8]:
# Calculate sum of squares
Event_Value['Avg_RV'] = (Event_Value['RV14'] + Event_Value['RV15'] + Event_Value['RV16'] + Event_Value['RV17'])/4
Event_Value['SumSq'] = (Event_Value['RV14']-Event_Value['Avg_RV'])**2 + (Event_Value['RV15']-Event_Value['Avg_RV'])**2 + (Event_Value['RV16']-Event_Value['Avg_RV'])**2 + (Event_Value['RV17']-Event_Value['Avg_RV'])**2
display(Event_Value)

Unnamed: 0,event,RV14,RV15,RV16,RV17,Avg_RV,SumSq
0,Batter Interference,-0.319625,-0.363838,-0.284649,-0.430019,-0.349533,0.011787
1,Bunt Groundout,-0.194784,-0.200346,-0.218826,-0.209411,-0.205842,0.000334
2,Bunt Lineout,-0.30381,-0.421575,-0.352295,-0.328292,-0.351493,0.007724
3,Bunt Pop Out,-0.31644,-0.354384,-0.342802,-0.373225,-0.346713,0.001694
4,Catcher Interference,0.380337,0.318276,0.301623,0.39907,0.349826,0.006675
5,Double,0.737518,0.752039,0.743467,0.779338,0.753091,0.001025
6,Double Play,-0.828774,-0.854665,-0.864981,-0.897164,-0.861396,0.002402
7,Fan interference,0.63356,0.577453,0.533316,0.590743,0.583768,0.005113
8,Field Error,0.462976,0.48546,0.469989,0.493206,0.477908,0.000577
9,Fielders Choice,0.698076,0.719351,0.701447,0.764112,0.720747,0.002769


In [9]:
max(Event_Value['SumSq'])

0.025962645475026698