In [5]:
import os
import warnings
warnings.filterwarnings("ignore")

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.base import clone
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from xgboost import XGBRegressor, XGBClassifier

from sklearn.metrics import mean_absolute_error, mean_squared_error, accuracy_score, f1_score, confusion_matrix
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader

RANDOM_STATE = 42
np.random.seed(RANDOM_STATE)

from pybaseball import statcast
df = statcast(start_dt="2023-03-28", end_dt="2025-09-28")

This is a large query, it may take a moment to complete
Skipping offseason dates
Skipping offseason dates


100%|████████████████████████████████████████████████████████████████████████████████| 677/677 [07:01<00:00,  1.61it/s]


In [57]:
TARGET_LA = "launch_angle"
TARGET_EV = "launch_speed"

df = df.dropna(subset=[TARGET_LA, TARGET_EV])

df = df[df['description'] == 'hit_into_play']

numeric_features = [
    "release_speed", "release_spin_rate", "pfx_x", "pfx_z",
    "plate_x", "plate_z", "vx0", "vy0", "vz0",
    "ax", "ay", "az"
]

batter_features = [
    "bat_speed", "swing_length", "attack_angle", "attack_direction",
    "estimated_slg_using_speedangle", "estimated_ba_using_speedangle",
    "estimated_woba_using_speedangle", "woba_value", "babip_value", "iso_value",
    "bat_score", "bat_score_diff", "bat_win_exp", "age_bat", 
    "batter_days_since_prev_game", "n_priorpa_thisgame_player_at_bat"
]

categorical_features = ["pitch_type", "stand", "p_throws", "home_team", "away_team"]

numeric_features = [f for f in numeric_features + batter_features if f in df.columns]
categorical_features = [f for f in categorical_features if f in df.columns and f not in numeric_features]

In [58]:
train_df, test_df = train_test_split(df, test_size=0.2, random_state=RANDOM_STATE)

X_train = train_df[numeric_features + categorical_features].copy()
y_train_la = train_df[TARGET_LA]
y_train_ev = train_df[TARGET_EV]

test_df_clean = test_df.dropna(subset=[TARGET_LA, TARGET_EV])
X_test = test_df_clean[numeric_features + categorical_features].copy()
y_test_la = test_df_clean[TARGET_LA]
y_test_ev = test_df_clean[TARGET_EV]

X_train = pd.get_dummies(X_train, columns=categorical_features)
X_test  = pd.get_dummies(X_test, columns=categorical_features)

X_test = X_test.reindex(columns=X_train.columns, fill_value=0)

imputer = SimpleImputer(strategy="median")
X_train[numeric_features] = imputer.fit_transform(X_train[numeric_features])
X_test[numeric_features]  = imputer.transform(X_test[numeric_features])

scaler = StandardScaler()
X_train[numeric_features] = scaler.fit_transform(X_train[numeric_features])
X_test[numeric_features]  = scaler.transform(X_test[numeric_features])

X_train = X_train.astype(np.float32)
X_test  = X_test.astype(np.float32)

In [59]:
def to_tensor(df_features, df_target):
    X = torch.tensor(df_features.values, dtype=torch.float32)
    y = torch.tensor(df_target.values, dtype=torch.float32).unsqueeze(1)
    return X, y

X_train_tensor, y_train_la_tensor = to_tensor(X_train, y_train_la)
_, y_train_ev_tensor = to_tensor(X_train, y_train_ev)
X_test_tensor, y_test_la_tensor = to_tensor(X_test, y_test_la)
_, y_test_ev_tensor = to_tensor(X_test, y_test_ev)

In [62]:
model_la_path = "model_la.pth"
model_ev_path = "model_ev.pth"

train_models = not (os.path.exists(model_la_path) and os.path.exists(model_ev_path))
print("Train models?", train_models)

class MLPRegression(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 1)
        )
    
    def forward(self, x):
        return self.model(x)

def train_model(model, X, y, lr=1e-5, epochs=50, batch_size=1024):
    dataset = TensorDataset(X, y)
    loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = nn.MSELoss()

    model.train()
    for epoch in range(epochs):
        running_loss = 0.0
        prog_bar = tqdm(loader, desc=f"Epoch {epoch+1}/{epochs}", leave=False)
        for xb, yb in prog_bar:
            optimizer.zero_grad()
            preds = model(xb)
            loss = nn.MSELoss()(preds, yb)
            loss.backward()
            
            clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()
            running_loss += loss.item()
            prog_bar.set_postfix({"batch_loss": loss.item()})
        
        avg_loss = running_loss / len(loader)
        print(f"Epoch {epoch+1}/{epochs} - Avg Loss: {avg_loss:.4f}")
    return model

input_dim = X_train_tensor.shape[1]

train_models = True

model_la = MLPRegression(input_dim)
model_ev = MLPRegression(input_dim)

if train_models:
    print("Training models...")
    model_la = train_model(model_la, X_train_tensor, y_train_la_tensor)
    model_ev = train_model(model_ev, X_train_tensor, y_train_ev_tensor)
    
    torch.save(model_la.state_dict(), model_la_path)
    torch.save(model_ev.state_dict(), model_ev_path)
    print("Models saved.")
else:
    print("Loading existing models...")
    model_la.load_state_dict(torch.load(model_la_path))
    model_ev.load_state_dict(torch.load(model_ev_path))
    print("Models loaded.")

Train models? False
Training models...


                                                                                                                       

Epoch 1/50 - Avg Loss: 994.1741


                                                                                                                       

Epoch 2/50 - Avg Loss: 985.2806


                                                                                                                       

Epoch 3/50 - Avg Loss: 966.2230


                                                                                                                       

Epoch 4/50 - Avg Loss: 937.7468


                                                                                                                       

Epoch 5/50 - Avg Loss: 901.3175


                                                                                                                       

Epoch 6/50 - Avg Loss: 860.0585


                                                                                                                       

Epoch 7/50 - Avg Loss: 816.6580


                                                                                                                       

Epoch 8/50 - Avg Loss: 776.8477


                                                                                                                       

Epoch 9/50 - Avg Loss: 746.4113


                                                                                                                       

Epoch 10/50 - Avg Loss: 725.9577


                                                                                                                       

Epoch 11/50 - Avg Loss: 712.7499


                                                                                                                       

Epoch 12/50 - Avg Loss: 702.8115


                                                                                                                       

Epoch 13/50 - Avg Loss: 694.2669


                                                                                                                       

Epoch 14/50 - Avg Loss: 686.6316


                                                                                                                       

Epoch 15/50 - Avg Loss: 679.9184


                                                                                                                       

Epoch 16/50 - Avg Loss: 672.7975


                                                                                                                       

Epoch 17/50 - Avg Loss: 665.8388


                                                                                                                       

Epoch 18/50 - Avg Loss: 659.4869


                                                                                                                       

Epoch 19/50 - Avg Loss: 652.9753


                                                                                                                       

Epoch 20/50 - Avg Loss: 646.5184


                                                                                                                       

Epoch 21/50 - Avg Loss: 640.2232


                                                                                                                       

Epoch 22/50 - Avg Loss: 633.9765


                                                                                                                       

Epoch 23/50 - Avg Loss: 627.1550


                                                                                                                       

Epoch 24/50 - Avg Loss: 621.7776


                                                                                                                       

Epoch 25/50 - Avg Loss: 616.0006


                                                                                                                       

Epoch 26/50 - Avg Loss: 610.2588


                                                                                                                       

Epoch 27/50 - Avg Loss: 605.2977


                                                                                                                       

Epoch 28/50 - Avg Loss: 599.3138


                                                                                                                       

Epoch 29/50 - Avg Loss: 593.8311


                                                                                                                       

Epoch 30/50 - Avg Loss: 588.0852


                                                                                                                       

Epoch 31/50 - Avg Loss: 581.6283


                                                                                                                       

Epoch 32/50 - Avg Loss: 575.9052


                                                                                                                       

Epoch 33/50 - Avg Loss: 569.8018


                                                                                                                       

Epoch 34/50 - Avg Loss: 564.1099


                                                                                                                       

Epoch 35/50 - Avg Loss: 557.4189


                                                                                                                       

Epoch 36/50 - Avg Loss: 551.3528


                                                                                                                       

Epoch 37/50 - Avg Loss: 545.8622


                                                                                                                       

Epoch 38/50 - Avg Loss: 539.1037


                                                                                                                       

Epoch 39/50 - Avg Loss: 533.3918


                                                                                                                       

Epoch 40/50 - Avg Loss: 528.3299


                                                                                                                       

Epoch 41/50 - Avg Loss: 522.0010


                                                                                                                       

Epoch 42/50 - Avg Loss: 516.1089


                                                                                                                       

Epoch 43/50 - Avg Loss: 511.2294


                                                                                                                       

Epoch 44/50 - Avg Loss: 506.3673


                                                                                                                       

Epoch 45/50 - Avg Loss: 500.0160


                                                                                                                       

Epoch 46/50 - Avg Loss: 495.4252


                                                                                                                       

Epoch 47/50 - Avg Loss: 490.8486


                                                                                                                       

Epoch 48/50 - Avg Loss: 486.1919


                                                                                                                       

Epoch 49/50 - Avg Loss: 481.4104


                                                                                                                       

Epoch 50/50 - Avg Loss: 477.1797


                                                                                                                       

Epoch 1/50 - Avg Loss: 8025.8061


                                                                                                                       

Epoch 2/50 - Avg Loss: 7951.6395


                                                                                                                       

Epoch 3/50 - Avg Loss: 7800.6118


                                                                                                                       

Epoch 4/50 - Avg Loss: 7577.4409


                                                                                                                       

Epoch 5/50 - Avg Loss: 7271.2854


                                                                                                                       

Epoch 6/50 - Avg Loss: 6872.1421


                                                                                                                       

Epoch 7/50 - Avg Loss: 6377.4143


                                                                                                                       

Epoch 8/50 - Avg Loss: 5780.7270


                                                                                                                       

Epoch 9/50 - Avg Loss: 5084.2810


                                                                                                                       

Epoch 10/50 - Avg Loss: 4295.3294


                                                                                                                       

Epoch 11/50 - Avg Loss: 3439.9441


                                                                                                                       

Epoch 12/50 - Avg Loss: 2562.8442


                                                                                                                       

Epoch 13/50 - Avg Loss: 1724.6227


                                                                                                                       

Epoch 14/50 - Avg Loss: 1011.2735


                                                                                                                       

Epoch 15/50 - Avg Loss: 533.7579


                                                                                                                       

Epoch 16/50 - Avg Loss: 353.3086


                                                                                                                       

Epoch 17/50 - Avg Loss: 311.1512


                                                                                                                       

Epoch 18/50 - Avg Loss: 288.5716


                                                                                                                       

Epoch 19/50 - Avg Loss: 275.0613


                                                                                                                       

Epoch 20/50 - Avg Loss: 265.7417


                                                                                                                       

Epoch 21/50 - Avg Loss: 257.0443


                                                                                                                       

Epoch 22/50 - Avg Loss: 250.7878


                                                                                                                       

Epoch 23/50 - Avg Loss: 245.7002


                                                                                                                       

Epoch 24/50 - Avg Loss: 241.7918


                                                                                                                       

Epoch 25/50 - Avg Loss: 237.9772


                                                                                                                       

Epoch 26/50 - Avg Loss: 235.1506


                                                                                                                       

Epoch 27/50 - Avg Loss: 231.2406


                                                                                                                       

Epoch 28/50 - Avg Loss: 228.4131


                                                                                                                       

Epoch 29/50 - Avg Loss: 226.2082


                                                                                                                       

Epoch 30/50 - Avg Loss: 224.3971


                                                                                                                       

Epoch 31/50 - Avg Loss: 222.1535


                                                                                                                       

Epoch 32/50 - Avg Loss: 220.3542


                                                                                                                       

Epoch 33/50 - Avg Loss: 219.2852


                                                                                                                       

Epoch 34/50 - Avg Loss: 216.7889


                                                                                                                       

Epoch 35/50 - Avg Loss: 215.0653


                                                                                                                       

Epoch 36/50 - Avg Loss: 214.1735


                                                                                                                       

Epoch 37/50 - Avg Loss: 212.7385


                                                                                                                       

Epoch 38/50 - Avg Loss: 211.4785


                                                                                                                       

Epoch 39/50 - Avg Loss: 210.7531


                                                                                                                       

Epoch 40/50 - Avg Loss: 209.7640


                                                                                                                       

Epoch 41/50 - Avg Loss: 208.1197


                                                                                                                       

Epoch 42/50 - Avg Loss: 207.3058


                                                                                                                       

Epoch 43/50 - Avg Loss: 206.2695


                                                                                                                       

Epoch 44/50 - Avg Loss: 205.7825


                                                                                                                       

Epoch 45/50 - Avg Loss: 205.3015


                                                                                                                       

Epoch 46/50 - Avg Loss: 204.5146


                                                                                                                       

Epoch 47/50 - Avg Loss: 204.4113


                                                                                                                       

Epoch 48/50 - Avg Loss: 203.5987


                                                                                                                       

Epoch 49/50 - Avg Loss: 202.8068


                                                                                                                       

Epoch 50/50 - Avg Loss: 202.7077
Models saved.




In [63]:
def predict_with_uncertainty(model, X, n_samples=30):
    model.train()
    preds_list = []
    with torch.no_grad():
        for _ in range(n_samples):
            preds_list.append(model(X).cpu().numpy())
    preds_array = np.array(preds_list)
    mean_preds = preds_array.mean(axis=0).flatten()
    std_preds = preds_array.std(axis=0).flatten()
    return mean_preds, std_preds

pred_la_mean, pred_la_std = predict_with_uncertainty(model_la, X_test_tensor)
pred_ev_mean, pred_ev_std = predict_with_uncertainty(model_ev, X_test_tensor)

rmse_la = np.sqrt(mean_squared_error(y_test_la, pred_la_mean))
rmse_ev = np.sqrt(mean_squared_error(y_test_ev, pred_ev_mean))
print(f"\nLaunch Angle RMSE: {rmse_la:.2f}")
print(f"Exit Velocity RMSE: {rmse_ev:.2f}")


Launch Angle RMSE: 21.26
Exit Velocity RMSE: 12.43


In [64]:
predicted_df = test_df_clean.copy()
predicted_df["launch_angle_pred"] = pred_la_mean
predicted_df["launch_angle_std"] = pred_la_std
predicted_df["launch_speed_pred"] = pred_ev_mean
predicted_df["launch_speed_std"] = pred_ev_std

predicted_df.to_excel("predicted_results_pytorch.xlsx", index=False)
print("\nPredictions saved to 'predicted_results_pytorch.xlsx'")


Predictions saved to 'predicted_results_pytorch.xlsx'


In [73]:
def player_season_summary(batter_id, df, X_df, model_la, model_ev, filter_bip=True):

    player_df = df[df['batter'] == batter_id].copy()
    
    if filter_bip:
        player_df = player_df[player_df['description'] == 'hit_into_play']
    
    if player_df.empty:
        print(f"No data found for batter ID {batter_id} with filter_bip={filter_bip}")
        return None
    
    actual_la = player_df['launch_angle'].mean()
    actual_ev = player_df['launch_speed'].mean()
    
    X_player = X_df.loc[player_df.index].copy().astype(np.float32)
    X_player_tensor = torch.tensor(X_player.values, dtype=torch.float32)
    
    pred_la_mean, pred_la_std = predict_with_uncertainty(model_la, X_player_tensor)
    pred_ev_mean, pred_ev_std = predict_with_uncertainty(model_ev, X_player_tensor)
    
    predicted_la = pred_la_mean.mean()
    predicted_ev = pred_ev_mean.mean()
    
    summary = pd.DataFrame({
        "Metric": ["Launch Angle", "Exit Velocity"],
        "Actual": [actual_la, actual_ev],
        "Predicted": [predicted_la, predicted_ev],
        "Pred Std (avg)": [pred_la_std.mean(), pred_ev_std.mean()]
    })
    
    return summary

In [74]:
freeman_summary = player_season_summary(
    batter_id=518692,  # Freddie Freeman
    df=test_df_clean,
    X_df=X_test,
    model_la=model_la,
    model_ev=model_ev,
    filter_bip=True
)

print(freeman_summary)

          Metric     Actual  Predicted  Pred Std (avg)
0   Launch Angle  15.505119  12.557218        4.723663
1  Exit Velocity  89.777816  87.982063        6.898694
