In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [2]:
swing_merged = pd.read_csv("/Users/christopherhsu/Desktop/projects/swing analysis/data/swing_merged.csv")
outcomes_pivot = pd.read_csv("/Users/christopherhsu/Desktop/projects/swing analysis/data/outcomes_pivot_2025.csv")
merged_full = pd.read_csv("/Users/christopherhsu/Desktop/projects/swing analysis/data/merged_full.csv")

In [3]:
print(list(merged_full.columns))

['id', 'name', 'side', 'avg_bat_speed_non', 'swing_tilt_non', 'attack_angle_non', 'attack_direction_non', 'ideal_attack_angle_rate_non', 'avg_intercept_y_vs_plate_non', 'avg_intercept_y_vs_batter_non', 'avg_batter_y_position_non', 'avg_batter_x_position_non', 'competitive_swings_non', 'avg_bat_speed_two', 'swing_tilt_two', 'attack_angle_two', 'attack_direction_two', 'ideal_attack_angle_rate_two', 'avg_intercept_y_vs_plate_two', 'avg_intercept_y_vs_batter_two', 'avg_batter_y_position_two', 'avg_batter_x_position_two', 'competitive_swings_two', 'delta_attack_angle', 'delta_bat_speed', 'delta_swing_tilt', 'delta_attack_direction', 'swings_non_x', 'swings_two_x', 'changer_flag', 'batter', 'avg_ev_non', 'avg_ev_two', 'avg_la_non', 'avg_la_two', 'avg_xwoba_non', 'avg_xwoba_two', 'balls_in_play_non', 'balls_in_play_two', 'contact_non', 'contact_two', 'contact_rate_non', 'contact_rate_two', 'swings_non_y', 'swings_two_y', 'delta_ev', 'delta_la', 'delta_xwoba', 'delta_contact_rate']


In [4]:
# Mechanics deltas already exist in merged_full
mechanics_deltas = ["delta_attack_angle", "delta_bat_speed", "delta_swing_tilt", "delta_attack_direction"]

# Create outcome deltas from paired columns
merged_full["delta_ev"] = merged_full["avg_ev_two"] - merged_full["avg_ev_non"]
merged_full["delta_la"] = merged_full["avg_la_two"] - merged_full["avg_la_non"]
merged_full["delta_xwoba"] = merged_full["avg_xwoba_two"] - merged_full["avg_xwoba_non"]
merged_full["delta_contact_rate"] = merged_full["contact_rate_two"] - merged_full["contact_rate_non"]

outcome_deltas = ["delta_ev", "delta_la", "delta_xwoba", "delta_contact_rate"]

print(merged_full[mechanics_deltas + outcome_deltas].head())


   delta_attack_angle  delta_bat_speed  delta_swing_tilt  \
0            0.265667         1.407420          0.133144   
1            1.927582         2.671433         -0.098548   
2            0.192446         0.901673          0.781471   
3            1.499869         1.142154          0.718347   
4            1.432437         1.550081          0.516675   

   delta_attack_direction  delta_ev  delta_la  delta_xwoba  delta_contact_rate  
0                0.063380  0.189719 -3.923913    -0.189471            0.022525  
1               -1.125978 -3.018942 -5.745125    -0.315077            0.078440  
2                0.355824  0.027404  0.454556    -0.307010            0.029234  
3               -1.157578 -1.102765 -3.905045    -0.267199            0.070953  
4               -0.910245 -2.030948 -2.612886    -0.264202            0.055126  


In [5]:
# Select all deltas
all_deltas = mechanics_deltas + outcome_deltas

# Correlation matrix
corr_matrix = merged_full[all_deltas].corr()

print(corr_matrix.loc[mechanics_deltas, outcome_deltas])


                        delta_ev  delta_la  delta_xwoba  delta_contact_rate
delta_attack_angle     -0.171612 -0.154244     0.018585            0.413026
delta_bat_speed        -0.269325 -0.053030     0.051264            0.359446
delta_swing_tilt        0.220847 -0.074166    -0.091127           -0.111185
delta_attack_direction  0.067102  0.071317    -0.050066           -0.260865


League-wide outcomes on two-strike and non two-strike counts

In [6]:
from scipy import stats

# Compute mean deltas
print(merged_full[outcome_deltas].mean())

# Run one-sample t-tests for each outcome delta
for col in outcome_deltas:
    tstat, pval = stats.ttest_1samp(merged_full[col].dropna(), 0)
    print(f"{col}: t={tstat:.3f}, p={pval:.3f}")


delta_ev             -1.121712
delta_la             -1.721074
delta_xwoba          -0.161614
delta_contact_rate    0.027544
dtype: float64
delta_ev: t=-9.937, p=0.000
delta_la: t=-8.646, p=0.000
delta_xwoba: t=-35.341, p=0.000
delta_contact_rate: t=10.395, p=0.000


In [7]:
import statsmodels.api as sm

# Define predictors (mechanics deltas)
X = merged_full[["delta_attack_angle", "delta_bat_speed", "delta_swing_tilt", "delta_attack_direction"]]
X = sm.add_constant(X)

# Regress each outcome delta on mechanics deltas
for outcome in ["delta_ev", "delta_la", "delta_xwoba", "delta_contact_rate"]:
    y = merged_full[outcome]
    model = sm.OLS(y, X).fit()
    print(f"\nOutcome: {outcome}")
    print(model.summary())



Outcome: delta_ev
                            OLS Regression Results                            
Dep. Variable:               delta_ev   R-squared:                       0.117
Model:                            OLS   Adj. R-squared:                  0.098
Method:                 Least Squares   F-statistic:                     6.079
Date:                Sun, 21 Sep 2025   Prob (F-statistic):           0.000129
Time:                        22:45:44   Log-Likelihood:                -339.00
No. Observations:                 189   AIC:                             688.0
Df Residuals:                     184   BIC:                             704.2
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                             coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------
const    

In [8]:
for outcome in ["delta_la", "delta_xwoba", "delta_contact_rate"]:
    y = merged_full[outcome]
    model = sm.OLS(y, X).fit()
    print(f"\nOutcome: {outcome}")
    print(model.summary())



Outcome: delta_la
                            OLS Regression Results                            
Dep. Variable:               delta_la   R-squared:                       0.047
Model:                            OLS   Adj. R-squared:                  0.026
Method:                 Least Squares   F-statistic:                     2.245
Date:                Sun, 21 Sep 2025   Prob (F-statistic):             0.0659
Time:                        22:45:44   Log-Likelihood:                -453.44
No. Observations:                 189   AIC:                             916.9
Df Residuals:                     184   BIC:                             933.1
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                             coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------
const    

In [9]:
for outcome in ["delta_xwoba", "delta_contact_rate"]:
    y = merged_full[outcome]
    model = sm.OLS(y, X).fit()
    print(f"\nOutcome: {outcome}")
    print(model.summary())


Outcome: delta_xwoba
                            OLS Regression Results                            
Dep. Variable:            delta_xwoba   R-squared:                       0.013
Model:                            OLS   Adj. R-squared:                 -0.008
Method:                 Least Squares   F-statistic:                    0.6055
Date:                Sun, 21 Sep 2025   Prob (F-statistic):              0.659
Time:                        22:45:44   Log-Likelihood:                 256.47
No. Observations:                 189   AIC:                            -502.9
Df Residuals:                     184   BIC:                            -486.7
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                             coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------
const 

In [10]:
y = merged_full["delta_contact_rate"]
model = sm.OLS(y, X).fit()
print(model.summary())


                            OLS Regression Results                            
Dep. Variable:     delta_contact_rate   R-squared:                       0.210
Model:                            OLS   Adj. R-squared:                  0.193
Method:                 Least Squares   F-statistic:                     12.21
Date:                Sun, 21 Sep 2025   Prob (F-statistic):           8.00e-09
Time:                        22:45:44   Log-Likelihood:                 380.60
No. Observations:                 189   AIC:                            -751.2
Df Residuals:                     184   BIC:                            -735.0
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                             coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                      0