In [None]:
#Packages
import os
import pandas as pd
import numpy as np
import xgboost
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score, roc_auc_score, roc_curve
from hyperopt import STATUS_OK, Trials, fmin, hp, tpe
import matplotlib.pyplot as plt
import seaborn as sns
import shap

In [None]:
#Get Data
csv_dir = '../../SEC Trackman Data/'
all_files = [os.path.join(csv_dir, f) for f in os.listdir(csv_dir) if f.endswith('.csv')]

df_list = []

for file in all_files:
    df = pd.read_csv(file)
    df_list.append(df)

all_pitches = pd.concat(df_list, ignore_index=True)

print(all_pitches.shape)
print(all_pitches.columns)

In [None]:
#Get all splitters
os_sp = all_pitches[all_pitches['TaggedPitchType'].isin(['Splitter'])]
print(os_sp.shape)

In [None]:
#Whiffs
os_sp['whiff'] = np.where(os_sp['PitchCall']=='StrikeSwinging',1,0)

In [None]:
preds = ['RelSpeed','VertRelAngle','HorzRelAngle','SpinRate','SpinAxis','RelHeight','RelSide','Extension','VertBreak','InducedVertBreak','HorzBreak','ZoneSpeed','VertApprAngle','HorzApprAngle','ZoneTime','pfxx','pfxz','x0','y0','z0','vx0','vy0','vz0','ax0','ay0','az0','SpeedDrop','PitchTrajectoryXc1','PitchTrajectoryXc2','PitchTrajectoryYc0','PitchTrajectoryYc1','PitchTrajectoryYc2','PitchTrajectoryZc0','PitchTrajectoryZc1','PitchTrajectoryZc2']

In [None]:
os_sp_X = os_sp[preds]
os_sp_y = os_sp['whiff']

In [None]:
#Train Model
X_train, X_test, y_train, y_test = train_test_split(os_sp_X, os_sp_y, test_size=.33, random_state=25)

model = XGBClassifier()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
predictions = [round(value) for value in y_pred]

accuracy = accuracy_score(y_test, predictions)
print("Accuracy: %.2f%%" % (accuracy * 100.0))

In [None]:
#Auburn Subset
aub_os_sp = os_sp[os_sp['PitcherTeam']=='AUB_TIG']

In [None]:
# Get unique values
unique_values = os_sp['Pitcher'].unique()
aub_unique_values = aub_os_sp['Pitcher'].unique()

#Print list of Auburn pitchers for selected split
print(aub_unique_values)

# Create a dictionary to store subsets
all_subsets = {value: os_sp[os_sp['Pitcher'] == value] for value in unique_values}

aub_subsets = {value: os_sp[os_sp['Pitcher'] == value] for value in aub_unique_values}

In [None]:
def aub_os_sp_stuff(name):
    
    preds = ['RelSpeed','VertRelAngle','HorzRelAngle','SpinRate','SpinAxis','RelHeight','RelSide','Extension','VertBreak','InducedVertBreak','HorzBreak','ZoneSpeed','VertApprAngle','HorzApprAngle','ZoneTime','pfxx','pfxz','x0','y0','z0','vx0','vy0','vz0','ax0','ay0','az0','SpeedDrop','PitchTrajectoryXc1','PitchTrajectoryXc2','PitchTrajectoryYc0','PitchTrajectoryYc1','PitchTrajectoryYc2','PitchTrajectoryZc0','PitchTrajectoryZc1','PitchTrajectoryZc2']
    
    all_preds = model.predict_proba(os_sp_X)[:,1]
    mean_pred = np.mean(all_preds)

    pitcher = aub_subsets[name]
    pitcher_x = pitcher[preds]

    whiff_preds = model.predict_proba(pitcher_x)[:,1]

    stuff = whiff_preds/mean_pred
    
    print(f"{name} Splitter stuff+: {np.mean(stuff * 100)}")

In [None]:
#Stuff+ for selected Auburn pitchers Splitter
for pitcher in aub_unique_values:
    aub_os_sp_stuff(pitcher)

In [None]:
aub_subsets['Allsup, Chase']

Now, let's do splits

In [None]:
#Subset splitters into platoon splits
os_sp_rr = os_sp[(os_sp['PitcherThrows'] == 'Right') & (os_sp['BatterSide'] == 'Right')]
os_sp_rl = os_sp[(os_sp['PitcherThrows'] == 'Right') & (os_sp['BatterSide'] == 'Left')]
os_sp_lr = os_sp[(os_sp['PitcherThrows'] == 'Left') & (os_sp['BatterSide'] == 'Right')]
os_sp_ll = os_sp[(os_sp['PitcherThrows'] == 'Left') & (os_sp['BatterSide'] == 'Left')]

print(os_sp_rr.shape)
print(os_sp_rl.shape)
print(os_sp_lr.shape)
print(os_sp_ll.shape)

In [None]:
#Whiffs
os_sp_rr['whiff'] = np.where(os_sp_rr['PitchCall']=='StrikeSwinging',1,0)
os_sp_rl['whiff'] = np.where(os_sp_rl['PitchCall']=='StrikeSwinging',1,0)
os_sp_lr['whiff'] = np.where(os_sp_lr['PitchCall']=='StrikeSwinging',1,0)
os_sp_ll['whiff'] = np.where(os_sp_ll['PitchCall']=='StrikeSwinging',1,0)

In [None]:
#Predictor Variables
preds = ['RelSpeed','VertRelAngle','HorzRelAngle','SpinRate','SpinAxis','RelHeight','RelSide','Extension','VertBreak','InducedVertBreak','HorzBreak','ZoneSpeed','VertApprAngle','HorzApprAngle','ZoneTime','pfxx','pfxz','x0','y0','z0','vx0','vy0','vz0','ax0','ay0','az0','SpeedDrop','PitchTrajectoryXc1','PitchTrajectoryXc2','PitchTrajectoryYc0','PitchTrajectoryYc1','PitchTrajectoryYc2','PitchTrajectoryZc0','PitchTrajectoryZc1','PitchTrajectoryZc2']

Righty vs Righty split

In [None]:
os_sp_rr_X = os_sp_rr[preds]
os_sp_rr_y = os_sp_rr['whiff']

In [None]:
#Train Model: RHP vs RHB
X_train, X_test, y_train, y_test = train_test_split(os_sp_rr_X, os_sp_rr_y, test_size=.33, random_state=25)

model = XGBClassifier()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
predictions = [round(value) for value in y_pred]

accuracy = accuracy_score(y_test, predictions)
print("Accuracy: %.2f%%" % (accuracy * 100.0))

In [None]:
y_pred_prob = model.predict_proba(X_test)[:,1]
roc_auc = roc_auc_score(y_test, predictions, multi_class='ovr')

print(f'ROC AUC: {roc_auc:.4f}')

# Plotting ROC Curve for one class (e.g., class 0)
fpr, tpr, _ = roc_curve(y_test == 1, y_pred_prob)
plt.plot(fpr, tpr, label='Class 0 ROC curve')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.legend(loc='best')
plt.show()

In [None]:
#Auburn Subset
aub_os_sp_rr = os_sp_rr[os_sp_rr['PitcherTeam']=='AUB_TIG']
print(aub_os_sp_rr.shape)

In [None]:
# Get unique values
unique_values = os_sp_rr['Pitcher'].unique()
aub_unique_values = aub_os_sp_rr['Pitcher'].unique()


#Print list of Auburn pitchers for selected split
print(aub_unique_values)

# Create a dictionary to store subsets
subsets = {value: os_sp_rr[os_sp_rr['Pitcher'] == value] for value in unique_values}
aub_subsets = {value: os_sp_rr[os_sp_rr['Pitcher'] == value] for value in aub_unique_values}

In [None]:
def aub_rr_os_sp_stuff(name):
    
    preds = ['RelSpeed','VertRelAngle','HorzRelAngle','SpinRate','SpinAxis','RelHeight','RelSide','Extension','VertBreak','InducedVertBreak','HorzBreak','ZoneSpeed','VertApprAngle','HorzApprAngle','ZoneTime','pfxx','pfxz','x0','y0','z0','vx0','vy0','vz0','ax0','ay0','az0','SpeedDrop','PitchTrajectoryXc1','PitchTrajectoryXc2','PitchTrajectoryYc0','PitchTrajectoryYc1','PitchTrajectoryYc2','PitchTrajectoryZc0','PitchTrajectoryZc1','PitchTrajectoryZc2']
    
    all_preds = model.predict_proba(os_sp_rr_X)[:,1]
    mean_pred = np.mean(all_preds)

    pitcher = aub_subsets[name]
    pitcher_x = pitcher[preds]

    whiff_preds = model.predict_proba(pitcher_x)[:,1]

    stuff = whiff_preds/mean_pred
    
    print(f"{name} splitter rr stuff+: {np.mean(stuff * 100)}")

In [None]:
#Stuff+ for selected Auburn pitchers Splitter in RHP v RHB split
for pitcher in aub_unique_values:
    aub_rr_os_sp_stuff(pitcher)

Righty vs. Lefty Split

In [None]:
os_sp_rl_X = os_sp_rl[preds]
os_sp_rl_y = os_sp_rl['whiff']

In [None]:
#Train Model: RHP vs LHB
X_train, X_test, y_train, y_test = train_test_split(os_sp_rl_X, os_sp_rl_y, test_size=.33, random_state=25)

model = XGBClassifier()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
predictions = [round(value) for value in y_pred]

accuracy = accuracy_score(y_test, predictions)
print("Accuracy: %.2f%%" % (accuracy * 100.0))

In [None]:
#Auburn Subset
aub_os_sp_rl = os_sp_rl[os_sp_rl['PitcherTeam']=='AUB_TIG']
print(aub_os_sp_rl.shape)

In [None]:
# Get unique values
unique_values = os_sp_rl['Pitcher'].unique()
aub_unique_values = aub_os_sp_rl['Pitcher'].unique()

#Print list of Auburn pitchers for selected split
print(aub_unique_values)

# Create a dictionary to store subsets
subsets = {value: os_sp_rl[os_sp_rl['Pitcher'] == value] for value in unique_values}
aub_subsets = {value: os_sp_rl[os_sp_rl['Pitcher'] == value] for value in aub_unique_values}

In [None]:
def aub_rl_os_sp_stuff(name):
    
    preds = ['RelSpeed','VertRelAngle','HorzRelAngle','SpinRate','SpinAxis','RelHeight','RelSide','Extension','VertBreak','InducedVertBreak','HorzBreak','ZoneSpeed','VertApprAngle','HorzApprAngle','ZoneTime','pfxx','pfxz','x0','y0','z0','vx0','vy0','vz0','ax0','ay0','az0','SpeedDrop','PitchTrajectoryXc1','PitchTrajectoryXc2','PitchTrajectoryYc0','PitchTrajectoryYc1','PitchTrajectoryYc2','PitchTrajectoryZc0','PitchTrajectoryZc1','PitchTrajectoryZc2']
    
    all_preds = model.predict_proba(os_sp_rl_X)[:,1]
    mean_pred = np.mean(all_preds)

    pitcher = aub_subsets[name]
    pitcher_x = pitcher[preds]

    whiff_preds = model.predict_proba(pitcher_x)[:,1]

    stuff = whiff_preds/mean_pred
    
    print(f"{name} splitter rl stuff+: {np.mean(stuff * 100)}")


In [None]:
#Stuff+ for selected Auburn pitchers Splitter in RHP v LHB split
for pitcher in aub_unique_values:
    aub_rl_os_sp_stuff(pitcher)

Lefty vs. Righty Split

In [None]:
os_sp_lr_X = os_sp_lr[preds]
os_sp_lr_y = os_sp_lr['whiff']

In [None]:
#Train Model: LHP vs RHB
X_train, X_test, y_train, y_test = train_test_split(os_sp_lr_X, os_sp_lr_y, test_size=.33, random_state=25)

model = XGBClassifier()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
predictions = [round(value) for value in y_pred]

accuracy = accuracy_score(y_test, predictions)
print("Accuracy: %.2f%%" % (accuracy * 100.0))

In [None]:
#Auburn Subset
aub_os_sp_lr = os_sp_lr[os_sp_lr['PitcherTeam']=='AUB_TIG']
print(aub_os_sp_lr.shape)

In [None]:
# Get unique values
unique_values = os_sp_lr['Pitcher'].unique()
aub_unique_values = aub_os_sp_lr['Pitcher'].unique()

#Print list of Auburn pitchers for selected split
print(aub_unique_values)

# Create a dictionary to store subsets
subsets = {value: os_sp_lr[os_sp_lr['Pitcher'] == value] for value in unique_values}
aub_subsets = {value: os_sp_lr[os_sp_lr['Pitcher'] == value] for value in aub_unique_values}



In [None]:
def aub_lr_os_sp_stuff(name):
    
    preds = ['RelSpeed','VertRelAngle','HorzRelAngle','SpinRate','SpinAxis','RelHeight','RelSide','Extension','VertBreak','InducedVertBreak','HorzBreak','ZoneSpeed','VertApprAngle','HorzApprAngle','ZoneTime','pfxx','pfxz','x0','y0','z0','vx0','vy0','vz0','ax0','ay0','az0','SpeedDrop','PitchTrajectoryXc1','PitchTrajectoryXc2','PitchTrajectoryYc0','PitchTrajectoryYc1','PitchTrajectoryYc2','PitchTrajectoryZc0','PitchTrajectoryZc1','PitchTrajectoryZc2']
    
    all_preds = model.predict_proba(os_sp_lr_X)[:,1]
    mean_pred = np.mean(all_preds)

    pitcher = aub_subsets[name]
    pitcher_x = pitcher[preds]

    whiff_preds = model.predict_proba(pitcher_x)[:,1]

    stuff = whiff_preds/mean_pred
    
    print(f"{name} splitter lr stuff+: {np.mean(stuff * 100)}")


In [None]:
#Stuff+ for selected Auburn pitchers Splitter in LHP v RHB split
for pitcher in aub_unique_values:
    aub_lr_os_sp_stuff(pitcher)

Lefty vs. Lefty Split

In [None]:
os_sp_ll_X = os_sp_ll[preds]
os_sp_ll_y = os_sp_ll['whiff']

In [None]:
#Train Model: LHP vs LHB
X_train, X_test, y_train, y_test = train_test_split(os_sp_ll_X, os_sp_ll_y, test_size=.33, random_state=25)

model = XGBClassifier()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
predictions = [round(value) for value in y_pred]

accuracy = accuracy_score(y_test, predictions)
print("Accuracy: %.2f%%" % (accuracy * 100.0))

In [None]:
#Auburn Subset
aub_os_sp_ll = os_sp_ll[os_sp_ll['PitcherTeam']=='AUB_TIG']
print(aub_os_sp_ll.shape)

In [None]:
# Get unique values
unique_values = os_sp_ll['Pitcher'].unique()
aub_unique_values = aub_os_sp_ll['Pitcher'].unique()

#Print list of Auburn pitchers for selected split
print(aub_unique_values)

# Create a dictionary to store subsets
subsets = {value: os_sp_ll[os_sp_ll['Pitcher'] == value] for value in unique_values}
aub_subsets = {value: os_sp_ll[os_sp_ll['Pitcher'] == value] for value in aub_unique_values}

In [None]:
def aub_ll_os_sp_stuff(name):
    
    preds = ['RelSpeed','VertRelAngle','HorzRelAngle','SpinRate','SpinAxis','RelHeight','RelSide','Extension','VertBreak','InducedVertBreak','HorzBreak','ZoneSpeed','VertApprAngle','HorzApprAngle','ZoneTime','pfxx','pfxz','x0','y0','z0','vx0','vy0','vz0','ax0','ay0','az0','SpeedDrop','PitchTrajectoryXc1','PitchTrajectoryXc2','PitchTrajectoryYc0','PitchTrajectoryYc1','PitchTrajectoryYc2','PitchTrajectoryZc0','PitchTrajectoryZc1','PitchTrajectoryZc2']
    
    all_preds = model.predict_proba(os_sp_ll_X)[:,1]
    mean_pred = np.mean(all_preds)

    pitcher = aub_subsets[name]
    pitcher_x = pitcher[preds]

    whiff_preds = model.predict_proba(pitcher_x)[:,1]

    stuff = whiff_preds/mean_pred
    
    print(f"{name} splitter ll stuff+: {np.mean(stuff * 100)}")


In [None]:
#Stuff+ for selected Auburn pitchers Splitter in LHP v LHB split
for pitcher in aub_unique_values:
    aub_ll_os_sp_stuff(pitcher)