In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from xgboost import XGBRegressor
import shap

file_path = r"C:\Users\brend\OneDrive - Stonehill College\Swing_Data.xlsx"
df = pd.read_excel(file_path)

In [4]:
df['side_encoded'] = df['side'].map({'L': 0, 'R': 1})

df.rename(columns={
    'avg_bat_speed': 'bat_speed',
    'swing_tilt': 'vertical_tilt',
    'avg_swing_length': 'swing_length',
    'avg_intercept_y_vs_batter': 'contact_point_rel_y'
}, inplace=True)

In [5]:
df['attack_direction_mirrored'] = df['attack_direction']
df.loc[df['side_encoded'] == 1, 'attack_direction_mirrored'] *= -1
df.loc[df['side_encoded'] == 0, 'attack_direction_mirrored'] *= -1
df['norm_attack_direction_mirrored'] = (
    df['attack_direction_mirrored'] / df['attack_direction_mirrored'].abs().max()
)

df['tilt_angle_interaction'] = df['vertical_tilt'] * df['attack_angle']
df['swing_aggressiveness'] = df['bat_speed'] * df['swing_length']
df['vertical_reach'] = df['attack_angle'] - df['contact_point_rel_y']
df['tilt_length_ratio'] = df['vertical_tilt'] / df['swing_length']

In [6]:
feature_cols = [
    'bat_speed', 'vertical_tilt', 'attack_angle', 'attack_direction_mirrored',
    'swing_length', 'contact_point_rel_y', 'tilt_angle_interaction',
    'norm_attack_direction_mirrored', 'swing_aggressiveness', 'vertical_reach',
    'tilt_length_ratio', 'side_encoded'
]

X = df[feature_cols]
y = df['xwobacon']

In [7]:
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

kf = KFold(n_splits=5, shuffle=True, random_state=42)

preds = np.zeros(len(df))
models = []
feature_importances = []
shap_interactions_list = []

for train_index, test_index in kf.split(X_scaled):
    X_train, X_test = X_scaled[train_index], X_scaled[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]
    weights_train = df.iloc[train_index]['competitive_swings']

    model = XGBRegressor(
        n_estimators=1000,
        max_depth=5,
        learning_rate=0.05,
        subsample=0.8,
        colsample_bytree=0.8,
        random_state=42
    )

    model.fit(X_train, y_train, sample_weight=weights_train)

    preds[test_index] = model.predict(X_test)

    models.append(model)
    feature_importances.append(model.feature_importances_)

    explainer = shap.TreeExplainer(model)
    shap_interactions = explainer.shap_interaction_values(X_test)
    shap_interactions_list.append(shap_interactions.mean(axis=0))

In [8]:
for side in [0, 1]:
    mask = df['side_encoded'] == side
    mean_pred = preds[mask].mean()
    std_pred = preds[mask].std()
    df.loc[mask, 'DSwing+'] = 100 + 10 * (preds[mask] - mean_pred) / std_pred

In [13]:
output_file = r"C:\Users\brend\OneDrive - Stonehill College\swing_plus_results.xlsx"
df_output = df[['year', 'Team', 'name', 'DSwing+', 'xwobacon']]
df_output.to_excel(output_file, index=False)
print(f"DSwing+ calculations complete! Results saved to {output_file}")

DSwing+ calculations complete! Results saved to C:\Users\brend\OneDrive - Stonehill College\swing_plus_results.xlsx


In [11]:
teams = ['BOS', 'NYM', 'NYY']
df_2025 = df[(df['year'] == 2025) & (df['Team'].isin(teams))]

feature_cols = [
    'bat_speed', 'vertical_tilt', 'attack_angle', 'attack_direction_mirrored',
    'swing_length', 'contact_point_rel_y', 'tilt_angle_interaction',
    'norm_attack_direction_mirrored', 'swing_aggressiveness', 'vertical_reach',
    'tilt_length_ratio', 'side_encoded'
]

export_cols = ['year', 'Team', 'name', 'DSwing+'] + feature_cols

top_bottom_list = []

for team in teams:
    team_df = df_2025[df_2025['Team'] == team].copy()
    
    top3 = team_df.nlargest(3, 'DSwing+')
    bottom3 = team_df.nsmallest(3, 'DSwing+')
    
    top_bottom_list.append(top3)
    top_bottom_list.append(bottom3)

result_df = pd.concat(top_bottom_list)[export_cols]

output_file = r"C:\Users\brend\OneDrive - Stonehill College\swing_plus_top_bottom_2025_features.xlsx"
result_df.to_excel(output_file, index=False)

print(f"Top and bottom 3 DSwing+ players for BOS, NYM, NYY in 2025 exported to {output_file}")

Top and bottom 3 DSwing+ players for BOS, NYM, NYY in 2025 exported to C:\Users\brend\OneDrive - Stonehill College\swing_plus_top_bottom_2025_features.xlsx
