In [1]:
import numpy as np
import pandas as pd

import os
import glob

import matplotlib.pyplot as plt

from statcast_batter_analysis import Batter,Game,AtBat

In [2]:
df = pd.read_csv('Data/2021/Cardinals.csv')
#df = pd.read_csv('Data/aggregated-data.csv')

In [3]:
df.shape

(18293, 92)

In [4]:
df = df.rename(columns={'events':'event_result','description':'event'})

In [5]:
excluded_events = ['foul_bunt','missed_bunt','hit_by_pitch','bunt_foul_tip']

In [6]:
excluded_results = ['hit_by_pitch','sac_bunt','catcher_interf']

In [7]:
df.shape

(18293, 92)

In [8]:
for event in excluded_events:
    df = df[df['event']!=event]

In [9]:
for result in excluded_results:
    df = df[df['event_result']!=result]

In [10]:
df.shape

(18130, 92)

In [11]:
df.head()

Unnamed: 0,pitch_type,game_date,release_speed,release_pos_x,release_pos_z,player_name,batter,pitcher,event_result,event,...,fld_score,post_away_score,post_home_score,post_bat_score,post_fld_score,if_fielding_alignment,of_fielding_alignment,spin_axis,delta_home_win_exp,delta_run_exp
0,SI,2021-08-27,90.3,1.57,5.63,"O'Neill, Tyler",641933,596071,field_out,hit_into_play,...,0,3,0,3,0,Standard,Standard,159.0,0.018,-0.294
1,CH,2021-08-27,83.1,1.54,5.65,"O'Neill, Tyler",641933,596071,,swinging_strike,...,0,3,0,3,0,Standard,Standard,136.0,0.0,-0.056
2,CH,2021-08-27,82.6,1.62,5.64,"O'Neill, Tyler",641933,596071,,ball,...,0,3,0,3,0,Standard,Standard,135.0,0.0,0.024
3,SI,2021-08-27,90.2,1.56,5.63,"Goldschmidt, Paul",502671,596071,double,hit_into_play,...,0,3,0,3,0,Standard,Standard,155.0,-0.013,0.224
4,SI,2021-08-27,90.3,1.44,5.61,"Edman, Tommy",669242,596071,home_run,hit_into_play,...,0,3,0,3,0,Standard,Standard,161.0,-0.192,1.734


In [12]:
df.keys()

Index(['pitch_type', 'game_date', 'release_speed', 'release_pos_x',
       'release_pos_z', 'player_name', 'batter', 'pitcher', 'event_result',
       'event', 'spin_dir', 'spin_rate_deprecated', 'break_angle_deprecated',
       'break_length_deprecated', 'zone', 'des', 'game_type', 'stand',
       'p_throws', 'home_team', 'away_team', 'type', 'hit_location', 'bb_type',
       'balls', 'strikes', 'game_year', 'pfx_x', 'pfx_z', 'plate_x', 'plate_z',
       'on_3b', 'on_2b', 'on_1b', 'outs_when_up', 'inning', 'inning_topbot',
       'hc_x', 'hc_y', 'tfs_deprecated', 'tfs_zulu_deprecated', 'fielder_2',
       'umpire', 'sv_id', 'vx0', 'vy0', 'vz0', 'ax', 'ay', 'az', 'sz_top',
       'sz_bot', 'hit_distance_sc', 'launch_speed', 'launch_angle',
       'effective_speed', 'release_spin_rate', 'release_extension', 'game_pk',
       'pitcher.1', 'fielder_2.1', 'fielder_3', 'fielder_4', 'fielder_5',
       'fielder_6', 'fielder_7', 'fielder_8', 'fielder_9', 'release_pos_y',
       'estimated_ba_

In [13]:
woba_df = df[df['woba_denom']==1]

In [16]:
woba_df['woba_value'].mean()

0.3100655021834061

In [None]:
batters = []
for player in players:
    print(player)
    player_df = df[df['player_name']==player]
    print(player_df.shape)
    batter = Batter(player,player_df,process_games=False)
    
    batters.append(batter)

In [None]:
def sortFunc(x):
    return x.data.shape[0]

batters.sort(reverse=True,key=sortFunc)

In [None]:
for batter in batters[:12]:
    
    BL = (batter.sz_left,batter.sz_btm)
    TL = (batter.sz_left,batter.sz_top)
    TR = (batter.sz_right,batter.sz_top)
    BR = (batter.sz_right,batter.sz_btm)
    
    batter_correct = batter.data[batter.data['isCorrectDecision']==True]
    batter_incorrect = batter.data[batter.data['isCorrectDecision']==False]
    
    not_strike_df = batter.data[batter.data['isStrike']==False]
    
    correct_count = not_strike_df['isCorrectDecision'].value_counts()[True]
    incorrect_count = not_strike_df['isCorrectDecision'].value_counts()[False]
    
    correct_percent = (correct_count / not_strike_df.shape[0] * 100).round(1)
    
    fig,ax = plt.subplots(figsize=(10,10))
    
    ax.set_title(f'{batter} - Correct Percent: {correct_percent}')

    x = [it[0] for it in (BL,TL,TR,BR,BL)]
    y = [it[1] for it in (BL,TL,TR,BR,BL)]

    ax.plot(x,y,color='k')

    ax.plot([batter.sz_left,batter.sz_right],[batter.sz_mid_btm,batter.sz_mid_btm],color='k',alpha=.5)
    ax.plot([batter.sz_left,batter.sz_right],[batter.sz_mid_top,batter.sz_mid_top],color='k',alpha=.5)

    ax.plot([batter.sz_mid_left,batter.sz_mid_left],[batter.sz_top,batter.sz_btm],color='k',alpha=.5)
    ax.plot([batter.sz_mid_right,batter.sz_mid_right],[batter.sz_top,batter.sz_btm],color='k',alpha=.5)

    ax.scatter(batter_correct['norm_plate_x'],batter_correct['plate_z'],color='tab:blue',alpha=.3)
    ax.scatter(batter_incorrect['norm_plate_x'],batter_incorrect['plate_z'],color='tab:red',alpha=.3)

    ax.grid()

    ax.set_xlim(-3,3)
    ax.set_ylim(0,6)
    plt.show()

In [None]:
for batter in batters[:12]:
    
    BL = (batter.sz_left,batter.sz_btm)
    TL = (batter.sz_left,batter.sz_top)
    TR = (batter.sz_right,batter.sz_top)
    BR = (batter.sz_right,batter.sz_btm)
    
    not_strike_df = batter.data[batter.data['isStrike']==False]
    
    batter_correct = not_strike_df[not_strike_df['isCorrectDecision']==True]
    batter_incorrect = not_strike_df[not_strike_df['isCorrectDecision']==False]
    
    correct_count = not_strike_df['isCorrectDecision'].value_counts()[True]
    incorrect_count = not_strike_df['isCorrectDecision'].value_counts()[False]
    
    correct_percent = (correct_count / not_strike_df.shape[0] * 100).round(1)
    
    fig,ax = plt.subplots(figsize=(10,10))
    
    ax.set_title(f'{batter} - Correct Percent: {correct_percent}')

    x = [it[0] for it in (BL,TL,TR,BR,BL)]
    y = [it[1] for it in (BL,TL,TR,BR,BL)]

    ax.plot(x,y,color='k')

    ax.plot([batter.sz_left,batter.sz_right],[batter.sz_mid_btm,batter.sz_mid_btm],color='k',alpha=.5)
    ax.plot([batter.sz_left,batter.sz_right],[batter.sz_mid_top,batter.sz_mid_top],color='k',alpha=.5)

    ax.plot([batter.sz_mid_left,batter.sz_mid_left],[batter.sz_top,batter.sz_btm],color='k',alpha=.5)
    ax.plot([batter.sz_mid_right,batter.sz_mid_right],[batter.sz_top,batter.sz_btm],color='k',alpha=.5)

    ax.scatter(batter_correct['norm_plate_x'],batter_correct['plate_z'],color='tab:blue',alpha=.3)
    ax.scatter(batter_incorrect['norm_plate_x'],batter_incorrect['plate_z'],color='tab:red',alpha=.3)

    ax.grid()

    ax.set_xlim(-3,3)
    ax.set_ylim(0,6)
    plt.show()

In [None]:
for batter in batters[:12]:
    
    BL = (-1,-1)
    TL = (-1,1)
    TR = (1,1)
    BR = (1,-1)
    
    not_strike_df = batter.data[batter.data['isStrike']==False]
    
    batter_correct = not_strike_df[not_strike_df['isCorrectDecision']==True]
    batter_incorrect = not_strike_df[not_strike_df['isCorrectDecision']==False]
    
    correct_count = not_strike_df['isCorrectDecision'].value_counts()[True]
    incorrect_count = not_strike_df['isCorrectDecision'].value_counts()[False]
    
    correct_percent = (correct_count / not_strike_df.shape[0] * 100).round(1)
    
    fig,ax = plt.subplots(figsize=(10,10))
    
    ax.set_title(f'{batter} - Correct Percent: {correct_percent}')

    x = [it[0] for it in (BL,TL,TR,BR,BL)]
    y = [it[1] for it in (BL,TL,TR,BR,BL)]

    ax.plot(x,y,color='k')

    # Horizontals
    ax.plot([-1,1],[0.333,0.333],color='k',alpha=.5)
    ax.plot([-1,1],[-0.333,0.-.333],color='k',alpha=.5)

    # Verticals
    ax.plot([-0.333,-0.333],[1,-1],color='k',alpha=.5)
    ax.plot([0.333,0.333],[1,-1],color='k',alpha=.5)

    ax.scatter(batter_correct['prop_plate_x'],batter_correct['prop_plate_z'],color='tab:blue',alpha=.3)
    ax.scatter(batter_incorrect['prop_plate_x'],batter_incorrect['prop_plate_z'],color='tab:red',alpha=.3)

    ax.grid()

    ax.set_xlim(-3,3)
    ax.set_ylim(-3,3)
    plt.show()

In [None]:
_dfs = []
for batter in batters:
    _df = batter.data
    _dfs.append(_df)

In [None]:
merged_df = pd.concat(_dfs)

In [None]:
incorrect_df = merged_df[(merged_df['isCorrectDecision']==False)]

In [None]:
outside_df = merged_df[merged_df['isStrike']==False]

In [None]:
incorrect_outside_df = merged_df[(merged_df['isCorrectDecision']==False) & (merged_df['isStrike']==False)]

In [None]:
incorrect_df.shape

In [None]:
incorrect_outside_df.shape

In [None]:
outside_df.shape

In [None]:
incorrect_df.keys()

In [None]:
fig,ax = plt.subplots(figsize=(8,6))

incorrect_outside_counts, bins, _ = ax.hist(incorrect_outside_df['dist_to_zone'].values,bins=100,range=(0,3))

ax.set_xlim(0,3)

plt.show()

In [None]:
fig,ax = plt.subplots(figsize=(8,6))

outside_counts, bins, _ = ax.hist(outside_df['dist_to_zone'].values,bins=100,range=(0,3))

ax.set_xlim(0,3)

plt.show()

In [None]:
fig,ax = plt.subplots(figsize=(8,6))

ax.plot(bins[:-1],incorrect_outside_counts/outside_counts)

plt.show()

In [None]:
x = np.arange(-3,3.01,.1).round(2)

In [None]:
z = np.arange(3,-3.01,-.1).round(2)

In [None]:
rows = []
for i in np.arange(len(z)-1):
    row = []
    for j in np.arange(len(x)-1):
        _df = merged_df[(merged_df['prop_plate_z'] < z[i]) & (merged_df['prop_plate_z'] >= z[i+1]) & (
            merged_df['prop_plate_x'] > x[j]) & (merged_df['prop_plate_x'] <= x[j+1])]
        
        isCorrectDecision = (_df['isCorrectDecision'].values).astype(int)
        if len(isCorrectDecision) == 0:
            row.append(1)
        else:
            correct_percent = sum(isCorrectDecision) / len(isCorrectDecision)
            row.append(correct_percent)
    rows.append(row)
rows=np.array(rows)

In [None]:
rows=np.array(rows)

In [None]:
fig,ax = plt.subplots(figsize=(8,8))

hm = ax.imshow(rows*100,cmap='RdYlGn')
ax.plot([19.5,19.5,39.5,39.5,19.5],[19.5,39.5,39.5,19.5,19.5],color='k')

ax.plot([18.5,18.5,40.5,40.5,18.5],[14.5,24.5,24.5,14.5,14.5],color='c')

axmin = -.5
axmax = 59.5

axticks = np.arange(axmin,axmax+.1,10)
xlabels = np.arange(-3,3.01,1)
zlabels = np.arange(3,-3.01,-1)

ax.set_xticks(axticks)
ax.set_yticks(axticks)

ax.set_xticklabels(xlabels)
ax.set_yticklabels(zlabels)

fig.colorbar(hm)

plt.show()