In [12]:
import requests
import pandas as pd
import pybaseball

In [13]:
catcher_throwing = pd.read_csv("catcher_throwing.csv")
pitch_tempo = pd.read_csv("pitch_tempo.csv").dropna()
sprint_speed = pd.read_csv("sprint_speed.csv")
pitcher_running_game = pd.read_csv("pitcher_running_game.csv")

In [14]:
# statcast_2023 = pb.statcast("2023-03-29","2023-10-02") #comment out after run once
# %store statcast_2023 #comment out after run once
%store -r statcast_2023

catcher_ids = list(set(statcast_2023.fielder_2))

In [15]:
sb_columns = [
    'game_pk',
    'at_bat_number',
    'runner_id',
    'pitcher_id',
    'catcher_id',
    'is_runner_sb2'
]

total_sb = pd.DataFrame(columns = sb_columns)

for id in catcher_ids:
    url = f'https://baseballsavant.mlb.com/leaderboard/services/catcher-throwing/{id}?game_type=Regular&n=q&season_end=2023&season_start=2023&split=no&team=&type=Cat&with_team_only=1'

    response = requests.get(url)

    data = response.json().get('data', [])
    data = pd.json_normalize(data)

    if len(data) > 0:
        total_sb = pd.concat([total_sb, data[sb_columns]], ignore_index=True)


total_sb = total_sb.rename(columns = {'is_runner_sb2': 'successful_sb'})
total_sb = pd.merge(total_sb, sprint_speed[["player_id","sprint_speed"]].rename(columns = {"player_id" : "runner_id", "sprint_speed" : "runner_sprint_speed"}), how = "left", on = "runner_id")
total_sb = pd.merge(total_sb, pitch_tempo[["entity_id","median_seconds_empty"]].rename(columns = {"entity_id" : "pitcher_id", "median_seconds_empty" : "pitch_tempo"}), how = "left", on = "pitcher_id")
total_sb = pd.merge(total_sb, pitcher_running_game[["player_id",'rate_sb2cs2',"pos11_r_sec_minus_prim_lead","pos11_r_sec_minus_prim_lead_sb2cs2"]].rename(columns = {"player_id" : "pitcher_id", "rate_sb2cs2":"sb_att_rate","pos11_r_sec_minus_prim_lead":"lead_dist_gained_opp","pos11_r_sec_minus_prim_lead_sb2cs2":"lead_dist_gained_att"}), how = "left", on = "pitcher_id")
total_sb = pd.merge(total_sb, catcher_throwing[["player_id","pop_time","exchange_time","arm_strength","cs_aa_per_throw"]].rename(columns = {"player_id" : "catcher_id"}), how = "left", on = "catcher_id")
total_sb = total_sb.dropna().reset_index(drop = True)

In [16]:
total_sb.to_csv("stolen_base_features.csv")