# Clean Dataset for Model

In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
# Clean data for modeling

pitch = pd.read_csv('../data/mlb-pitches.csv', index_col = [0])
pitch = pitch[['player_name', 'p_throws', 'pitch_type','release_speed', 'release_spin_rate', 'spin_axis', 
               'pfx_-x', 'pfx_z', 'bauer_units', 'effective_speed', 'release_pos_-x', 'release_pos_x', 
               'release_pos_z', 'release_extension', 'release_pos_y', 'plate_-x', 'plate_x', 'plate_z', 
               'type', 'balls','strikes', 'pitch_count', 'delta_run_exp', 'stand', 'bb_type', 'description', 
               'events', 'hit_distance_sc', 'launch_speed', 'launch_angle', 'launch_speed_angle', 'woba_value', 
               'woba_denom', 'xba', 'xwoba', 'babip_value', 'iso_value', 'at_bat_number', 'pitch_number', 
               'inning', 'inning_topbot', 'home_score', 'away_score', 'post_home_score', 'post_away_score', 
               'on_1b', 'on_2b', 'on_3b', 'outs_when_up']].copy()

pitch.dropna(subset = ['pitch_type', 'release_speed', 'release_spin_rate', 'pfx_-x', 'pfx_z',
                       'release_extension', 'release_pos_-x', 'release_pos_z', 'delta_run_exp'], inplace = True)

#Rename some columns
col_dict = {
    'release_speed': 'velo',
    'release_spin_rate': 'spin_rate',
    'launch_speed': 'exit_velo',
}

pitch.rename(columns = col_dict, inplace = True)
pitch.to_csv('../data/model-data.csv')

In [3]:
# Run Expectany Table

# 2010-2015 Run Expectancy
matrix = [[0, 0, 0, 0, 0.481], [1, 0, 0, 0, 0.859], [0, 1, 0, 0, 1.100], [1, 1, 0, 0, 1.437], 
          [0, 0, 1, 0, 1.350], [1, 0, 1, 0, 1.784], [0, 1, 1, 0, 1.964], [1, 1, 1, 0, 2.292], 
          [0, 0, 0, 1, 0.254], [1, 0, 0, 1, 0.509], [0, 1, 0, 1, 0.664], [1, 1, 0, 1, 0.884], 
          [0, 0, 1, 1, 0.950], [1, 0, 1, 1, 1.130], [0, 1, 1, 1, 1.376], [1, 1, 1, 1, 1.541],
          [0, 0, 0, 2, 0.098], [1, 0, 0, 2, 0.224], [0, 1, 0, 2, 0.319], [1, 1, 0, 2, 0.429], 
          [0, 0, 1, 2, 0.353], [1, 0, 1, 2, 0.478], [0, 1, 1, 2, 0.580], [1, 1, 1, 2, 0.752]]

re = pd.DataFrame(matrix, columns = ['on_1b', 'on_2b', 'on_3b', 'outs_when_up', 're'])
# re.head()
re.to_csv('../data/run_expectancy_table.csv')

count_re = pd.read_csv('../data/count-re.csv')
print(count_re.shape)
count_re.head()

(24, 5)


In [4]:
# Add RV to model-data.csv

data = pd.read_csv('../data/model-data.csv', index_col = [0])

# Add Arm angle
arm_angle = data.groupby(['player_name', 'pitch_type'], as_index=False)['release_pos_x', 'release_pos_z'].mean()
arm_angle = pd.DataFrame(arm_angle)
adj = (arm_angle['release_pos_z'] - arm_angle['release_pos_x']) * 0.7
opp = abs(arm_angle['release_pos_x'])
hyp = np.sqrt((opp ** 2) + (adj ** 2))
arm_angle['arm_angle'] = round(np.arccos(((adj ** 2 + hyp ** 2) - opp ** 2) / (2 * (adj * hyp))), 3) * 100
arm_angle['slot'] = ['overhead' if (x >= 0) & (x <= 40) else '3/4' if (x >= 41) & (x <= 60) else 'sidearm'
                     for x in arm_angle['arm_angle']]
arm_angle.drop(columns = ['release_pos_x', 'release_pos_z'], inplace = True)
data = pd.merge(data, arm_angle, how = 'left', on = ['player_name', 'pitch_type'])

data = data[data['events'] != 'wild_pitch']
data = data[data['events'] != 'passed_ball']
data = data[data['events'] != 'stolen_base_2b']

data['events'].replace(['sac_bunt', 'double_play', 'caught_stealing_2b', 'strikeout_double_play',
                        'other_out', 'sac_fly_double_play', 'pickoff_2b', 'pickoff_3b', 'triple_play'
                        'caught_stealing_home', 'pickoff_caught_stealing_2b', 'pickoff_3b', 
                        'sac_bunt_double_play', 'pickoff_caught_stealing_3b', 'pickoff_1b', 
                        'caught_stealing_3b', 'triple_play', 'caught_stealing_home', 
                        'strikeout'], 'field_out', inplace = True)
data['events'].replace(['catcher_interf'], 'field_error', inplace = True)

data['description'].replace(['called_strike', 'swinging_strike', 'swinging_strike_blocked', 'missed_bunt',
                             'foul'], 'strike', inplace = True)
data['description'].replace(['passed_ball', 'wild_pitch'], 'ball', inplace = True)

data['is_strike'] = [1 if x != 'B' else 0 for x in data['type']]
data['is_ball'] = [1 if x == 'B' else 0 for x in data['type']]

data['inning_topbot'] = data.inning_topbot.map({'Top': 0, 'Bot': 1})
data['on_1b'] = [1 if x > 1 else 0 for x in data['on_1b']]
data['on_2b'] = [1 if x > 1 else 0 for x in data['on_2b']]
data['on_3b'] = [1 if x > 1 else 0 for x in data['on_3b']]

data['final_pitch_ab'] = [1 if x == x else 0 for x in data['events']]
data['out_to_end_inning'] = [1 if x == 'field_out' and y == 2 else 0 for (x, y) 
                             in zip(data['events'], data['outs_when_up'])]
data['home_runs'] = data['post_home_score'] - data['home_score']
data['away_runs'] = data['post_away_score'] - data['away_score']
data['runs'] = data['home_runs'] + data['away_runs']

# Merge RE Table with data
data = pd.merge(data, re, how = 'left', on = ['on_1b', 'on_2b', 'on_3b', 'outs_when_up'])
data['re_change'] = round(-data['re'].diff(1), 3)
data['re_change'].fillna(-0.098, inplace = True)
data['re_end_state'] = data['re'] + data['re_change']
data['re_end_state'] = [0 if x == 1 else y for (x, y) in zip(data['out_to_end_inning'], data['re_end_state'])]
data['re_change'] = [-y if x == 1 else z for (x, y, z) in zip(data['out_to_end_inning'], 
                                                              data['re'], data['re_change'])]
data['re_change'].replace([-0.000], 0.000, inplace = True)
data['re24'] = data['re_change'] + data['runs']

# Create Linear Weights with average RE by events
mlb_lw = data.groupby(['events'], as_index = False)['re24'].mean()
lw_ball_in_play = pd.DataFrame(mlb_lw)
lw_ball_in_play['re24'] = round(lw_ball_in_play['re24'], 3)
lw_ball_in_play.rename(columns = {'re24': 'lin_weight_above_avg'}, inplace = True)

# For lin weights based on base out state 
data = pd.merge(data, lw_ball_in_play, how = 'left', on = ['events'])
data['lin_weight_above_outs'] = data['lin_weight_above_avg'] + 0.25
data['woba_scale'] = 1.209
data['lin_weights_above_avg_scale'] = round(data['lin_weight_above_avg'] * data['woba_scale'], 3)
data['lin_weights_above_outs_scale'] = round(data['lin_weight_above_outs'] * data['woba_scale'], 3)

# Merge ball strike count RE with data
data = pd.merge(data, count_re, how = 'left', on = ['pitch_count', 'is_strike', 'is_ball'])
data['count_re'] = data['wraa_change'] + data['re24']

# # Create Linear Weights with average RE by ball strike count
mlb_lw_bs = data.groupby(['pitch_count'], as_index = False)['count_re'].mean()
lw_bs = pd.DataFrame(mlb_lw_bs)
lw_bs['count_re'] = round(lw_bs['count_re'], 3)
lw_bs.rename(columns = {'count_re': 'bs_lin_weight'}, inplace = True)
data = pd.merge(data, lw_bs, how = 'left', on = ['pitch_count'])
data['bs_lin_weight_scale'] = round(data['bs_lin_weight'] * data['woba_scale'], 3)

data['lin_weights_above_avg_scale'].fillna(0, inplace = True)
data['lin_weights_above_outs_scale'].fillna(0, inplace = True)

#data['lin_weights_above_avg_scale'].fillna(data['bs_lin_weight_scale'], inplace = True)
#data['lin_weights_above_outs_scale'].fillna(data['bs_lin_weight_scale'], inplace = True)
#data['lin_weights_above_outs_scale'] = [y if x == 0.000 else x for (x, y) in 
#                                        zip(data['lin_weights_above_outs_scale'], data['bs_lin_weight_scale'])]

#data['lin_weights_above_avg_scale'].fillna(data['bs_lin_weight'], inplace = True)
#data['lin_weights_above_outs_scale'].fillna(data['bs_lin_weight'], inplace = True)
#data['lin_weights_above_outs_scale'] = [y if x == 0.000 else x for (x, y) in 
#                                        zip(data['lin_weights_above_outs_scale'], data['bs_lin_weight'])]

data['rv_above_avg'] = data['lin_weights_above_avg_scale'] + data['bs_lin_weight_scale'] # * 100
data['rv'] = data['lin_weights_above_outs_scale'] + data['bs_lin_weight_scale'] # * 100)

data.to_csv('../data/model-pitches-rv.csv')

pd.set_option('max_columns', None)
print(data.shape)
data.head()

(705396, 74)


Unnamed: 0,player_name,p_throws,pitch_type,velo,spin_rate,spin_axis,pfx_-x,pfx_z,bauer_units,effective_speed,release_pos_-x,release_pos_x,release_pos_z,release_extension,release_pos_y,plate_-x,plate_x,plate_z,type,balls,strikes,pitch_count,delta_run_exp,stand,bb_type,description,events,hit_distance_sc,exit_velo,launch_angle,launch_speed_angle,woba_value,woba_denom,xba,xwoba,babip_value,iso_value,at_bat_number,pitch_number,inning,inning_topbot,home_score,away_score,post_home_score,post_away_score,on_1b,on_2b,on_3b,outs_when_up,arm_angle,slot,is_strike,is_ball,final_pitch_ab,out_to_end_inning,home_runs,away_runs,runs,re,re_change,re_end_state,re24,lin_weight_above_avg,lin_weight_above_outs,woba_scale,lin_weights_above_avg_scale,lin_weights_above_outs_scale,woba,wraa_change,count_re,bs_lin_weight,bs_lin_weight_scale,rv_above_avg,rv
0,"Smith, Will",L,FF,92.3,2330.0,148.0,-8.28,16.56,25.24377,92.8,-1.4,1.4,6.8,6.5,54.03,0.69,-0.69,2.83,X,1,2,1-2,-0.073,R,ground_ball,hit_into_play,field_out,13.0,95.2,-13.0,2.0,0.0,1.0,0.174,0.158,0.0,0.0,61,4,9,0,5,0,5,0,0,0,0,2,36.0,overhead,1,0,1,1,0,0,0,0.098,-0.098,0.0,-0.098,-0.25,0.0,1.209,-0.302,0.0,0.223,-0.184,-0.282,-0.141,-0.17,-0.472,-0.17
1,"Smith, Will",L,SL,80.6,2254.0,315.0,9.24,5.76,27.965261,81.2,-1.6,1.6,6.64,6.4,54.15,0.71,-0.71,2.62,S,1,1,1-1,-0.027,R,,strike,,108.0,75.3,75.0,,,,,,,,61,3,9,0,5,0,5,0,0,0,0,2,45.8,3/4,1,0,0,0,0,0,0,0.098,0.0,0.098,0.0,,,1.209,0.0,0.0,0.293,-0.058,-0.058,-0.006,-0.007,-0.007,-0.007
2,"Smith, Will",L,CU,75.5,1940.0,328.0,7.8,-6.12,25.695364,75.2,-1.46,1.46,6.88,6.2,54.34,0.04,-0.04,2.46,S,1,0,1-0,-0.02,R,,strike,,157.0,83.5,65.0,,,,,,,,61,2,9,0,5,0,5,0,0,0,0,2,38.4,overhead,1,0,0,0,0,0,0,0.098,0.0,0.098,0.0,,,1.209,0.0,0.0,0.355,-0.051,-0.051,0.001,0.001,0.001,0.001
3,"Smith, Will",L,CU,75.0,2017.0,330.0,8.28,-8.28,26.893333,74.5,-1.53,1.53,6.83,5.9,54.61,-2.1,2.1,3.89,B,0,0,0-0,0.016,R,,ball,,,,,,,,,,,,61,1,9,0,5,0,5,0,0,0,0,2,38.4,overhead,0,1,0,0,0,0,0,0.098,0.0,0.098,0.0,,,1.209,0.0,0.0,0.314,0.034,0.034,-0.003,-0.004,-0.004,-0.004
4,"Smith, Will",L,FF,91.2,2281.0,143.0,-7.56,15.36,25.010965,90.9,-1.49,1.49,6.66,6.3,54.15,0.31,-0.31,2.8,X,1,0,1-0,-0.189,L,ground_ball,hit_into_play,field_out,9.0,93.3,-18.0,2.0,0.0,1.0,0.1,0.09,0.0,0.0,60,2,9,0,5,0,5,0,0,0,0,1,36.0,overhead,1,0,1,0,0,0,0,0.254,-0.156,0.098,-0.156,-0.25,0.0,1.209,-0.302,0.0,0.355,-0.051,-0.207,0.001,0.001,-0.301,0.001


In [5]:
data2 = data.copy()

data2['n_pitches'] = [1 if x is not np.nan else 0 for x in data2['pitch_type']]

print(data2.shape)
data2.head()

(705396, 75)


Unnamed: 0,player_name,p_throws,pitch_type,velo,spin_rate,spin_axis,pfx_-x,pfx_z,bauer_units,effective_speed,release_pos_-x,release_pos_x,release_pos_z,release_extension,release_pos_y,plate_-x,plate_x,plate_z,type,balls,strikes,pitch_count,delta_run_exp,stand,bb_type,description,events,hit_distance_sc,exit_velo,launch_angle,launch_speed_angle,woba_value,woba_denom,xba,xwoba,babip_value,iso_value,at_bat_number,pitch_number,inning,inning_topbot,home_score,away_score,post_home_score,post_away_score,on_1b,on_2b,on_3b,outs_when_up,arm_angle,slot,is_strike,is_ball,final_pitch_ab,out_to_end_inning,home_runs,away_runs,runs,re,re_change,re_end_state,re24,lin_weight_above_avg,lin_weight_above_outs,woba_scale,lin_weights_above_avg_scale,lin_weights_above_outs_scale,woba,wraa_change,count_re,bs_lin_weight,bs_lin_weight_scale,rv_above_avg,rv,n_pitches
0,"Smith, Will",L,FF,92.3,2330.0,148.0,-8.28,16.56,25.24377,92.8,-1.4,1.4,6.8,6.5,54.03,0.69,-0.69,2.83,X,1,2,1-2,-0.073,R,ground_ball,hit_into_play,field_out,13.0,95.2,-13.0,2.0,0.0,1.0,0.174,0.158,0.0,0.0,61,4,9,0,5,0,5,0,0,0,0,2,36.0,overhead,1,0,1,1,0,0,0,0.098,-0.098,0.0,-0.098,-0.25,0.0,1.209,-0.302,0.0,0.223,-0.184,-0.282,-0.141,-0.17,-0.472,-0.17,1
1,"Smith, Will",L,SL,80.6,2254.0,315.0,9.24,5.76,27.965261,81.2,-1.6,1.6,6.64,6.4,54.15,0.71,-0.71,2.62,S,1,1,1-1,-0.027,R,,strike,,108.0,75.3,75.0,,,,,,,,61,3,9,0,5,0,5,0,0,0,0,2,45.8,3/4,1,0,0,0,0,0,0,0.098,0.0,0.098,0.0,,,1.209,0.0,0.0,0.293,-0.058,-0.058,-0.006,-0.007,-0.007,-0.007,1
2,"Smith, Will",L,CU,75.5,1940.0,328.0,7.8,-6.12,25.695364,75.2,-1.46,1.46,6.88,6.2,54.34,0.04,-0.04,2.46,S,1,0,1-0,-0.02,R,,strike,,157.0,83.5,65.0,,,,,,,,61,2,9,0,5,0,5,0,0,0,0,2,38.4,overhead,1,0,0,0,0,0,0,0.098,0.0,0.098,0.0,,,1.209,0.0,0.0,0.355,-0.051,-0.051,0.001,0.001,0.001,0.001,1
3,"Smith, Will",L,CU,75.0,2017.0,330.0,8.28,-8.28,26.893333,74.5,-1.53,1.53,6.83,5.9,54.61,-2.1,2.1,3.89,B,0,0,0-0,0.016,R,,ball,,,,,,,,,,,,61,1,9,0,5,0,5,0,0,0,0,2,38.4,overhead,0,1,0,0,0,0,0,0.098,0.0,0.098,0.0,,,1.209,0.0,0.0,0.314,0.034,0.034,-0.003,-0.004,-0.004,-0.004,1
4,"Smith, Will",L,FF,91.2,2281.0,143.0,-7.56,15.36,25.010965,90.9,-1.49,1.49,6.66,6.3,54.15,0.31,-0.31,2.8,X,1,0,1-0,-0.189,L,ground_ball,hit_into_play,field_out,9.0,93.3,-18.0,2.0,0.0,1.0,0.1,0.09,0.0,0.0,60,2,9,0,5,0,5,0,0,0,0,1,36.0,overhead,1,0,1,0,0,0,0,0.254,-0.156,0.098,-0.156,-0.25,0.0,1.209,-0.302,0.0,0.355,-0.051,-0.207,0.001,0.001,-0.301,0.001,1


In [6]:
avg_metrics = data2.groupby(['player_name', 'p_throws', 'pitch_type'], 
                            as_index = False)['velo', 'spin_rate', 'pfx_-x', 'pfx_z', 
                                              'release_pos_-x','release_pos_z',
                                              'release_extension', 
                                              'arm_angle'].mean()
avg_metrics = pd.DataFrame(avg_metrics)

sum_rv = data2.groupby(['player_name', 'p_throws', 'pitch_type'], 
                       as_index = False)['n_pitches', 'rv'].sum()
rv = pd.merge(avg_metrics, sum_rv, how = 'left', on = ['player_name', 'p_throws', 'pitch_type'])
rv['RV/100'] = round(((rv['rv'] / rv['n_pitches']) * 100), 2)
rv['velo'] = round(rv['velo'], 1)
rv['spin_rate'] = round(rv['spin_rate'])
rv['pfx_-x'] = round(rv['pfx_-x'], 2)
rv['pfx_z'] = round(rv['pfx_z'], 2)
rv['release_extension'] = round(rv['release_extension'], 2)
rv['release_pos_-x'] = round(rv['release_pos_-x'], 2)
rv['release_pos_z'] = round(rv['release_pos_z'], 2)
print(rv.shape)
rv.head()

rv.to_csv('../data/test-rv.csv')

(3469, 14)


In [22]:
avg_metrics2 = data2.groupby(['player_name', 'p_throws', 'pitch_type'], 
                            as_index = False)['velo', 'spin_rate', 'pfx_-x', 'pfx_z', 
                                              'release_pos_-x','release_pos_z',
                                              'release_extension', 
                                              'arm_angle', 'rv'].mean()
avg_metrics2 = pd.DataFrame(avg_metrics2)

sum_pitches2 = data2.groupby(['player_name', 'p_throws', 'pitch_type'], 
                       as_index = False)['rv', 'n_pitches'].sum()
rv2 = pd.merge(avg_metrics2, sum_pitches2, how = 'left', on = ['player_name', 'p_throws', 'pitch_type'])
# rv2['RV/100'] = round(((rv['rv'] / rv['n_pitches']) * 100), 2)
rv2['velo'] = round(rv2['velo'], 1)
rv2['spin_rate'] = round(rv2['spin_rate'])
rv2['pfx_-x'] = round(rv2['pfx_-x'], 2)
rv2['pfx_z'] = round(rv2['pfx_z'], 2)
rv2['release_extension'] = round(rv2['release_extension'], 2)
rv2['release_pos_-x'] = round(rv2['release_pos_-x'], 2)
rv2['release_pos_z'] = round(rv2['release_pos_z'], 2)
rv2.rename(columns = {'rv_x': 'avg_rv', 'rv_y': 'rv'}, inplace = True)
rv2.to_csv('../data/test-rv2.csv')

In [29]:
rv2['pp'] = rv2['rv'] / rv2['n_pitches']
rv2['pp'] = rv2['avg_rv'] * rv2['n_pitches']
print(rv2.shape)
rv2.head()

(3469, 15)


Unnamed: 0,player_name,p_throws,pitch_type,velo,spin_rate,pfx_-x,pfx_z,release_pos_-x,release_pos_z,release_extension,arm_angle,avg_rv,rv,n_pitches,pp
0,"Abad, Fernando",L,CH,75.1,1638.0,-10.65,14.23,-0.71,6.42,5.88,17.7,-0.02287,-1.052,46,-1.052
1,"Abad, Fernando",L,FC,82.6,2310.0,3.38,2.74,-1.14,6.02,5.86,32.2,0.04875,0.78,16,0.78
2,"Abad, Fernando",L,FF,91.7,2112.0,-5.34,16.2,-0.45,6.18,5.86,11.3,0.157611,2.837,18,2.837
3,"Abad, Fernando",L,KC,76.4,2502.0,7.23,-9.18,-0.73,6.26,5.63,18.8,0.043,3.354,78,3.354
4,"Abad, Fernando",L,SI,92.4,2145.0,-13.19,12.92,-0.56,6.29,5.83,13.9,0.05473,7.717,141,7.717


In [14]:
sum_pitches3 = data2.groupby(['player_name', 'p_throws', 'pitch_type'], 
                       as_index = False)['rv'].sum()
sum_pitches3.head()

Unnamed: 0,player_name,p_throws,pitch_type,rv
0,"Abad, Fernando",L,CH,-1.052
1,"Abad, Fernando",L,FC,0.78
2,"Abad, Fernando",L,FF,2.837
3,"Abad, Fernando",L,KC,3.354
4,"Abad, Fernando",L,SI,7.717


In [24]:
avg_metrics2.head()

Unnamed: 0,player_name,p_throws,pitch_type,velo,spin_rate,pfx_-x,pfx_z,release_pos_-x,release_pos_z,release_extension,arm_angle,rv
0,"Abad, Fernando",L,CH,75.134783,1637.73913,-10.651304,14.225217,-0.713696,6.423696,5.880435,17.7,-0.02287
1,"Abad, Fernando",L,FC,82.59375,2310.375,3.375,2.745,-1.14,6.0225,5.85625,32.2,0.04875
2,"Abad, Fernando",L,FF,91.688889,2112.111111,-5.34,16.2,-0.453333,6.175556,5.855556,11.3,0.157611
3,"Abad, Fernando",L,KC,76.408974,2501.858974,7.230769,-9.18,-0.734872,6.259744,5.625641,18.8,0.043
4,"Abad, Fernando",L,SI,92.356028,2144.503546,-13.185532,12.923404,-0.561064,6.294468,5.832624,13.9,0.05473


In [27]:
data.loc[(data['player_name'] == 'Abad, Fernando') & (data['pitch_type'] == 'FF')].describe()

Unnamed: 0,velo,spin_rate,spin_axis,pfx_-x,pfx_z,bauer_units,effective_speed,release_pos_-x,release_pos_x,release_pos_z,release_extension,release_pos_y,plate_-x,plate_x,plate_z,balls,strikes,delta_run_exp,hit_distance_sc,exit_velo,launch_angle,launch_speed_angle,woba_value,woba_denom,xba,xwoba,babip_value,iso_value,at_bat_number,pitch_number,inning,inning_topbot,home_score,away_score,post_home_score,post_away_score,on_1b,on_2b,on_3b,outs_when_up,arm_angle,is_strike,is_ball,final_pitch_ab,out_to_end_inning,home_runs,away_runs,runs,re,re_change,re_end_state,re24,lin_weight_above_avg,lin_weight_above_outs,woba_scale,lin_weights_above_avg_scale,lin_weights_above_outs_scale,woba,wraa_change,count_re,bs_lin_weight,bs_lin_weight_scale,rv_above_avg,rv
count,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,8.0,8.0,8.0,4.0,6.0,6.0,4.0,4.0,6.0,6.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,6.0,6.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0
mean,91.688889,2112.111111,154.0,-5.34,16.2,22.985762,91.127778,-0.453333,0.453333,6.175556,5.855556,54.64,-0.327778,0.327778,2.856667,1.666667,1.777778,0.175778,187.75,85.7125,16.0,4.5,0.958333,1.0,0.6075,0.84325,0.5,0.666667,58.722222,4.666667,6.611111,0.666667,5.888889,3.611111,5.944444,3.611111,0.444444,0.0,0.055556,1.166667,11.3,0.5,0.5,0.333333,0.0,0.055556,0.0,0.055556,0.363389,0.112278,0.475667,0.167833,0.5235,0.7735,1.209,0.211056,0.311722,0.282611,-0.076667,0.091167,-0.127667,-0.154111,0.056944,0.157611
std,8.190015,248.94835,22.210226,0.999129,1.50275,1.27165,8.120548,0.218982,0.218982,0.153899,0.182216,0.1757,1.023314,1.023314,0.83918,0.840168,0.548319,0.31482,151.667635,17.49926,29.061511,1.914854,0.657584,0.0,0.349069,0.573727,0.547723,1.21106,11.166155,1.455214,1.144752,0.485071,2.987993,2.913233,3.038425,2.913233,0.51131,0.0,0.235702,0.785905,3.655712e-15,0.514496,0.514496,0.485071,0.0,0.235702,0.0,0.235702,0.238144,0.270524,0.443115,0.339897,0.54205,0.54205,2.28482e-16,0.469736,0.576277,0.058723,0.167219,0.309664,0.058597,0.070748,0.461266,0.562984
min,59.2,1226.0,139.0,-7.08,14.16,20.709459,58.9,-0.88,0.03,5.91,5.6,54.14,-1.83,-1.41,0.81,0.0,0.0,-0.142,3.0,67.6,-31.0,2.0,0.0,1.0,0.089,0.089,0.0,0.0,26.0,1.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,11.3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.098,-0.156,0.098,-0.156,-0.25,0.0,1.209,-0.302,0.0,0.223,-0.318,-0.382,-0.168,-0.203,-0.505,-0.203
25%,93.025,2059.75,144.75,-6.09,14.97,22.173908,92.425,-0.5375,0.295,6.055,5.8,54.5825,-1.2575,-0.35,2.4125,1.0,2.0,0.0045,83.25,71.325,4.0,3.5,0.75,1.0,0.5795,0.55025,0.0,0.0,52.0,4.0,6.25,0.0,4.0,3.0,4.0,3.0,0.0,0.0,0.0,1.0,11.3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.224,0.0,0.1295,0.0,0.35,0.6,1.209,0.0,0.0,0.223,-0.226,-0.04025,-0.168,-0.203,-0.17375,-0.17375
50%,93.8,2112.0,150.0,-5.64,16.08,22.654584,93.15,-0.49,0.49,6.18,5.8,54.68,-0.445,0.445,2.865,2.0,2.0,0.043,181.5,79.25,15.0,5.0,0.9,1.0,0.746,0.995,0.5,0.0,64.5,4.5,7.0,1.0,6.5,3.0,6.5,3.0,0.0,0.0,0.0,1.0,11.3,0.5,0.5,0.0,0.0,0.0,0.0,0.0,0.254,0.0,0.366,0.0,0.458,0.708,1.209,0.0,0.0,0.273,-0.0155,0.041,-0.143,-0.1725,-0.17,-0.17
75%,94.35,2258.5,153.5,-4.38,17.13,23.899903,93.85,-0.295,0.5375,6.2775,5.9,54.7375,0.35,1.2575,3.3275,2.0,2.0,0.1905,258.0,103.275,28.5,6.0,1.1625,1.0,0.774,1.288,1.0,0.75,66.5,5.75,7.0,1.0,8.75,3.0,9.0,3.0,1.0,0.0,0.0,2.0,11.3,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.502,0.0,0.509,0.19125,0.689,0.939,1.209,0.285,0.5115,0.30875,0.041,0.1685,-0.141,-0.17,0.15375,0.38025
max,95.4,2380.0,240.0,-3.72,20.04,25.481799,94.7,-0.03,0.88,6.41,6.4,54.91,1.41,1.83,4.26,3.0,2.0,0.951,425.0,108.2,65.0,6.0,2.0,1.0,0.849,1.294,1.0,3.0,68.0,7.0,8.0,1.0,9.0,13.0,9.0,13.0,1.0,0.0,1.0,2.0,11.3,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.859,0.925,1.784,1.0,1.395,1.645,1.209,1.687,1.989,0.384,0.255,0.699,0.0,0.0,1.512,1.814


In [26]:
avg_metrics2.loc[(avg_metrics2['player_name'] == 'Abad, Fernando') & (avg_metrics2['pitch_type'] == 'FF')].describe()

Unnamed: 0,velo,spin_rate,pfx_-x,pfx_z,release_pos_-x,release_pos_z,release_extension,arm_angle,rv
count,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
mean,91.688889,2112.111111,-5.34,16.2,-0.453333,6.175556,5.855556,11.3,0.157611
std,,,,,,,,,
min,91.688889,2112.111111,-5.34,16.2,-0.453333,6.175556,5.855556,11.3,0.157611
25%,91.688889,2112.111111,-5.34,16.2,-0.453333,6.175556,5.855556,11.3,0.157611
50%,91.688889,2112.111111,-5.34,16.2,-0.453333,6.175556,5.855556,11.3,0.157611
75%,91.688889,2112.111111,-5.34,16.2,-0.453333,6.175556,5.855556,11.3,0.157611
max,91.688889,2112.111111,-5.34,16.2,-0.453333,6.175556,5.855556,11.3,0.157611
