In [2]:
from pybaseball import statcast, cache
import pandas as pd

cache.enable()

In [4]:
data2025 = pd.read_csv("/Users/christopherhsu/Desktop/projects/swing analysis/data/data_2025")

In [7]:
# Create flag for whether pitch/PA had 2 strikes
data2025["two_strike_flag"] = data2025["strikes"] == 2

# Quick check for the right batter IDs
data2025[["batter", "strikes", "two_strike_flag"]].head(10)

Unnamed: 0,batter,strikes,two_strike_flag
0,663616,0,False
1,663616,0,False
2,668885,2,True
3,668885,1,False
4,668885,0,False
5,621439,2,True
6,621439,2,True
7,621439,2,True
8,621439,2,True
9,621439,1,False


In [9]:
swing_events = data2025[data2025["description"].isin([
    "swinging_strike", "foul", "hit_into_play", "hit_into_play_no_out", "hit_into_play_score"
])]

outcomes = (
    swing_events.groupby(["batter", "two_strike_flag"])
    .agg(
        swings=("description", "count"),
        contact=("description", lambda x: x.isin(["foul", "hit_into_play", "hit_into_play_no_out", "hit_into_play_score"]).sum()),
        balls_in_play=("launch_speed", lambda x: x.notna().sum()),  # has batted-ball data
        avg_ev=("launch_speed", "mean"),    # exit velocity
        avg_la=("launch_angle", "mean"),    # launch angle
        avg_xwoba=("estimated_woba_using_speedangle", "mean")  # expected wOBA on contact
    )
    .reset_index()
)

outcomes["contact_rate"] = outcomes["contact"] / outcomes["swings"]


In [10]:
outcomes_pivot = outcomes.pivot_table(
    index="batter",
    columns="two_strike_flag",
    values=[
        "swings", "contact", "balls_in_play", 
        "avg_ev", "avg_la", "avg_xwoba", "contact_rate"
    ]
)

outcomes_pivot.columns = [
    f"{metric}_{'two' if flag else 'non'}"
    for metric, flag in outcomes_pivot.columns
]

outcomes_pivot = outcomes_pivot.reset_index()

outcomes_pivot.head()


Unnamed: 0,batter,avg_ev_non,avg_ev_two,avg_la_non,avg_la_two,avg_xwoba_non,avg_xwoba_two,balls_in_play_non,balls_in_play_two,contact_non,contact_two,contact_rate_non,contact_rate_two,swings_non,swings_two
0,455117,79.093204,82.365909,16.330097,12.181818,0.395048,0.151704,103.0,44.0,115.0,52.0,0.684524,0.547368,168.0,95.0
1,456781,83.055696,83.258974,18.322785,16.803419,0.322087,0.194667,158.0,117.0,169.0,127.0,0.757848,0.803797,223.0,158.0
2,457705,83.150239,83.673551,15.788095,18.213768,0.423154,0.22962,418.0,276.0,450.0,300.0,0.737705,0.769231,610.0,390.0
3,457759,82.234884,82.757143,24.726744,23.633929,0.319904,0.248975,172.0,112.0,187.0,122.0,0.846154,0.841379,221.0,145.0
4,467793,84.236623,81.394677,17.18961,19.623574,0.30046,0.228853,385.0,263.0,414.0,283.0,0.747292,0.832353,554.0,340.0
