In [16]:
pip install optuna


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.1.1[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [17]:

import os
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, ConfusionMatrixDisplay
from sklearn.model_selection import train_test_split

from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline


import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, TensorDataset

import optuna
from optuna.pruners import MedianPruner


# -------------------------
# Reproducibility
# -------------------------
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)

In [18]:


# Optuna settings
N_TRIALS        = 30          # change as needed on HPC
N_EPOCHS_TUNE   = 12          # short runs for tuning
EPOCHS_FINAL    = 25          # longer run for final training

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {DEVICE}")

Using device: cpu


In [19]:
print("\nSECTION 1: LOADING DATA FROM CSV")
csv_path = "/Users/006490246/Desktop/statcast_4years.csv"
print("Looking for:", csv_path)
print("Exists?", os.path.exists(csv_path))

df = pd.read_csv(csv_path)


print(f"Loaded data shape: {df.shape}")
print(f"Columns: {df.columns.tolist()}")


SECTION 1: LOADING DATA FROM CSV
Looking for: /Users/006490246/Desktop/statcast_4years.csv
Exists? True
Loaded data shape: (3080411, 118)
Columns: ['pitch_type', 'game_date', 'release_speed', 'release_pos_x', 'release_pos_z', 'player_name', 'batter', 'pitcher', 'events', 'description', 'spin_dir', 'spin_rate_deprecated', 'break_angle_deprecated', 'break_length_deprecated', 'zone', 'des', 'game_type', 'stand', 'p_throws', 'home_team', 'away_team', 'type', 'hit_location', 'bb_type', 'balls', 'strikes', 'game_year', 'pfx_x', 'pfx_z', 'plate_x', 'plate_z', 'on_3b', 'on_2b', 'on_1b', 'outs_when_up', 'inning', 'inning_topbot', 'hc_x', 'hc_y', 'tfs_deprecated', 'tfs_zulu_deprecated', 'umpire', 'sv_id', 'vx0', 'vy0', 'vz0', 'ax', 'ay', 'az', 'sz_top', 'sz_bot', 'hit_distance_sc', 'launch_speed', 'launch_angle', 'effective_speed', 'release_spin_rate', 'release_extension', 'game_pk', 'fielder_2', 'fielder_3', 'fielder_4', 'fielder_5', 'fielder_6', 'fielder_7', 'fielder_8', 'fielder_9', 'release

In [20]:
print("\nSECTION 2: FILTERING TO BALLS IN PLAY (HITS + OUTS)")

# Define hit and out events
hit_types = ["single", "double", "triple", "home_run"]

out_event_types = [
    "field_out",
    "force_out",
    "double_play",
    "triple_play",
    "grounded_into_double_play",
    "other_out",
    "sac_fly",
    "sac_fly_double_play",
    "sac_bunt",
    "sac_bunt_double_play",
    "fielders_choice_out",
]

# Balls in play: type == 'X'
bip_mask = df["type"] == "X"
event_mask = df["events"].isin(hit_types + out_event_types)

data = df[bip_mask & event_mask].copy()

print(f"\n✓ Filtered to balls in play that are hits or outs")
print(f"  Rows: {len(data):,}")
print("\nEvent distribution:")
print(data["events"].value_counts().to_string())


SECTION 2: FILTERING TO BALLS IN PLAY (HITS + OUTS)

✓ Filtered to balls in play that are hits or outs
  Rows: 537,681

Event distribution:
events
field_out                    320871
single                       113984
double                        34962
home_run                      24383
force_out                     15587
grounded_into_double_play     14496
sac_fly                        5424
triple                         2973
sac_bunt                       1933
double_play                    1673
fielders_choice_out            1300
sac_fly_double_play              82
triple_play                      12
sac_bunt_double_play              1


In [21]:
print("\nSECTION 3: BUILDING 5-OUTCOME TARGET")

# 5 outcome columns (one-hot style, mutually exclusive)
data["outs_in_play"] = data["events"].isin(out_event_types).astype(int)
data["single"]       = (data["events"] == "single").astype(int)
data["double"]       = (data["events"] == "double").astype(int)
data["triple"]       = (data["events"] == "triple").astype(int)
data["home_run"]     = (data["events"] == "home_run").astype(int)

outcome_cols = ["outs_in_play", "single", "double", "triple", "home_run"]

# Sanity check
row_sum = data[outcome_cols].sum(axis=1)
assert (row_sum == 1).all(), "Some rows do not map to exactly one of the 5 outcomes!"

print("\n✓ Created 5 outcome columns:")
print(data[outcome_cols].head())
print("\nOutcome counts:")
print(data[outcome_cols].sum())
print("\nOutcome proportions (%):")
print((data[outcome_cols].mean() * 100).round(2))

# Build a single 5-class label: 0–4
# 0 = outs_in_play, 1 = single, 2 = double, 3 = triple, 4 = home_run
conditions = [
    data["outs_in_play"] == 1,
    data["single"] == 1,
    data["double"] == 1,
    data["triple"] == 1,
    data["home_run"] == 1,
]
choices = [0, 1, 2, 3, 4]
data["outcome_class"] = np.select(conditions, choices, default=-1).astype(int)
assert (data["outcome_class"] >= 0).all(), "Found rows with invalid outcome_class!"

print("\nOutcome_class distribution (0=out,1=1B,2=2B,3=3B,4=HR):")
print(data["outcome_class"].value_counts().sort_index())



SECTION 3: BUILDING 5-OUTCOME TARGET

✓ Created 5 outcome columns:
   outs_in_play  single  double  triple  home_run
0             1       0       0       0         0
2             0       1       0       0         0
3             0       1       0       0         0
4             1       0       0       0         0
5             0       0       1       0         0

Outcome counts:
outs_in_play    361379
single          113984
double           34962
triple            2973
home_run         24383
dtype: int64

Outcome proportions (%):
outs_in_play    67.21
single          21.20
double           6.50
triple           0.55
home_run         4.53
dtype: float64

Outcome_class distribution (0=out,1=1B,2=2B,3=3B,4=HR):
outcome_class
0    361379
1    113984
2     34962
3      2973
4     24383
Name: count, dtype: int64


In [22]:
print("\nSECTION 4: CREATING NEW FEATURES")

print("\nCreating derived features...")

# spray_angle: direction of batted ball using home plate reference (125.42, 125.42)
if "hc_x" in data.columns and "hc_y" in data.columns:
    data["spray_angle"] = (
        np.arctan2(data["hc_y"] - 125.42, data["hc_x"] - 125.42) * 180 / np.pi
    )
    print("  ✓ spray_angle: Direction of batted ball")

# horizontal_distance: how far left/right the ball went
if "hc_x" in data.columns:
    data["horizontal_distance"] = np.abs(data["hc_x"] - 125.42)
    print("  ✓ horizontal_distance: |hc_x - center|")

# pitch_distance_from_center: distance from middle of the strike zone
if "plate_x" in data.columns and "plate_z" in data.columns:
    data["pitch_distance_from_center"] = np.sqrt(
        data["plate_x"] ** 2 + (data["plate_z"] - 2.5) ** 2
    )
    print("  ✓ pitch_distance_from_center: distance from zone center")

# count: ball-strike count (categorical)
if "balls" in data.columns and "strikes" in data.columns:
    data["count"] = data["balls"].astype(str) + "-" + data["strikes"].astype(str)
    print("  ✓ count: 'balls-strikes' representation")

# runners_on: total number of baserunners
runner_columns = ["on_1b", "on_2b", "on_3b"]
if all(col in data.columns for col in runner_columns):
    data["runners_on"] = (~data[runner_columns].isna()).sum(axis=1)
    print("  ✓ runners_on: total base runners")

# launch_speed_x_angle: interaction between EV and LA
if "launch_speed" in data.columns and "launch_angle" in data.columns:
    data["launch_speed_x_angle"] = data["launch_speed"] * data["launch_angle"]
    print("  ✓ launch_speed_x_angle: launch_speed * launch_angle")

print(f"\n✓ Feature engineering complete")
print(f"  Total columns now: {len(data.columns)}")


SECTION 4: CREATING NEW FEATURES

Creating derived features...
  ✓ spray_angle: Direction of batted ball
  ✓ horizontal_distance: |hc_x - center|
  ✓ pitch_distance_from_center: distance from zone center
  ✓ count: 'balls-strikes' representation
  ✓ runners_on: total base runners
  ✓ launch_speed_x_angle: launch_speed * launch_angle

✓ Feature engineering complete
  Total columns now: 130


In [23]:

print("\nSECTION 5: SELECTING RELEVANT FEATURES FOR MODELING")

# Define the features you want to use (no 'events', no outcome columns here)
selected_features = [
    # Batted ball characteristics
    "launch_speed",      # Exit velocity in mph
    "launch_angle",      # Angle off the bat in degrees
    "hit_distance_sc",   # Projected hit distance
    "bb_type",           # Batted ball type (fly_ball, ground_ball, etc.)
    'attack_angle',
    'attack_direction',

    # Hit location
    "hc_x",              # Hit coordinate X
    "hc_y",              # Hit coordinate Y

    # Pitch characteristics
    "release_speed",     # Pitch velocity
    "pitch_type",        # Type of pitch (FF, SL, CH, etc.)
    "plate_x",           # Horizontal pitch location
    "plate_z",           # Vertical pitch location
    'arm_angle',
    'release_spin_rate',
    'spin_axis',

    # Game situation
    "balls",             # Ball count
    "strikes",           # Strike count
    "outs_when_up",      # Number of outs
    "inning",            # Inning number

    # Matchup information
    "stand",             # Batter stance (L/R)
    "p_throws",          # Pitcher handedness (L/R)


    # Engineered features
    "spray_angle",
    "horizontal_distance",
    "pitch_distance_from_center",
    "count",
    "runners_on",
    "launch_speed_x_angle",
]

# Keep only those features that actually exist in `data`
available_features = [col for col in selected_features if col in data.columns]
X = data[available_features].copy()
y = data["outcome_class"].values.astype(np.int64)

print(f"\n✓ Selected {len(available_features)} features")
print(f"  Features included: {', '.join(available_features)}")

# Identify categorical and numerical features
categorical_features = X.select_dtypes(include=["object"]).columns.tolist()
numerical_features   = X.select_dtypes(include=[np.number]).columns.tolist()

print(f"\nCategorical features ({len(categorical_features)}): {categorical_features}")
print(f"Numeric features ({len(numerical_features)}): {numerical_features}")
print(f"\nTarget (outcome_class) shape: {y.shape}")



SECTION 5: SELECTING RELEVANT FEATURES FOR MODELING

✓ Selected 27 features
  Features included: launch_speed, launch_angle, hit_distance_sc, bb_type, attack_angle, attack_direction, hc_x, hc_y, release_speed, pitch_type, plate_x, plate_z, arm_angle, release_spin_rate, spin_axis, balls, strikes, outs_when_up, inning, stand, p_throws, spray_angle, horizontal_distance, pitch_distance_from_center, count, runners_on, launch_speed_x_angle

Categorical features (5): ['bb_type', 'pitch_type', 'stand', 'p_throws', 'count']
Numeric features (22): ['launch_speed', 'launch_angle', 'hit_distance_sc', 'attack_angle', 'attack_direction', 'hc_x', 'hc_y', 'release_speed', 'plate_x', 'plate_z', 'arm_angle', 'release_spin_rate', 'spin_axis', 'balls', 'strikes', 'outs_when_up', 'inning', 'spray_angle', 'horizontal_distance', 'pitch_distance_from_center', 'runners_on', 'launch_speed_x_angle']

Target (outcome_class) shape: (537681,)


In [24]:
print("\nSECTION 6: TRAIN/VAL/TEST SPLIT")



# First: train+val vs test
X_temp, X_test, y_temp, y_test = train_test_split(
    X, y,
    test_size=0.15,      # 15% test
    random_state=42,
    stratify=y
)

# Then: split temp into train and val
X_train, X_val, y_train, y_val = train_test_split(
    X_temp, y_temp,
    test_size=0.1765,    # 0.1765 * 0.85 ≈ 0.15 → 70/15/15 overall
    random_state=42,
    stratify=y_temp
)

print("Split sizes:")
print("  X_train:", X_train.shape)
print("  X_val:  ", X_val.shape)
print("  X_test: ", X_test.shape)



SECTION 6: TRAIN/VAL/TEST SPLIT
Split sizes:
  X_train: (376362, 27)
  X_val:   (80666, 27)
  X_test:  (80653, 27)


In [25]:
print("\nSECTION 7: BUILDING PREPROCESSING PIPELINE (IMPUTE + SCALE/OHE)")

categorical_features = X_train.select_dtypes(include=["object"]).columns.tolist()
numeric_features     = X_train.select_dtypes(include=[np.number]).columns.tolist()

numeric_transformer = Pipeline(steps=[
    ("imputer", SimpleImputer(strategy="median")),
    ("scaler", StandardScaler()),
])

categorical_transformer = Pipeline(steps=[
    ("imputer", SimpleImputer(strategy="most_frequent")),  # or "constant", fill_value="MISSING"
    ("onehot", OneHotEncoder(handle_unknown="ignore", sparse=False)),
])

preprocessor = ColumnTransformer(
    transformers=[
        ("num", numeric_transformer, numeric_features),
        ("cat", categorical_transformer, categorical_features),
    ],
    remainder="drop"
)

X_train_proc = preprocessor.fit_transform(X_train)
X_val_proc   = preprocessor.transform(X_val)
X_test_proc  = preprocessor.transform(X_test)

print("\nPreprocessed shapes:")
print("  X_train_proc:", X_train_proc.shape)
print("  X_val_proc:  ", X_val_proc.shape)
print("  X_test_proc: ", X_test_proc.shape)

input_dim = X_train_proc.shape[1]
print(f"\nModel input dimension: {input_dim}")



SECTION 7: BUILDING PREPROCESSING PIPELINE (IMPUTE + SCALE/OHE)

Preprocessed shapes:
  X_train_proc: (376362, 60)
  X_val_proc:   (80666, 60)
  X_test_proc:  (80653, 60)

Model input dimension: 60


In [26]:
print("\nSECTION 8: HYPERPARAMETER SEARCH WITH OPTUNA")

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {DEVICE}")

# ----- Tensors & Dataloaders -----

X_train_t = torch.tensor(X_train_proc, dtype=torch.float32)
X_val_t   = torch.tensor(X_val_proc,   dtype=torch.float32)
X_test_t  = torch.tensor(X_test_proc,  dtype=torch.float32)

y_train_t = torch.tensor(y_train, dtype=torch.long)
y_val_t   = torch.tensor(y_val,   dtype=torch.long)
y_test_t  = torch.tensor(y_test,  dtype=torch.long)

BATCH_SIZE = 1024

train_loader = DataLoader(
    TensorDataset(X_train_t, y_train_t),
    batch_size=BATCH_SIZE,
    shuffle=True,
    drop_last=False
)
val_loader = DataLoader(
    TensorDataset(X_val_t, y_val_t),
    batch_size=BATCH_SIZE,
    shuffle=False,
    drop_last=False
)
test_loader = DataLoader(
    TensorDataset(X_test_t, y_test_t),
    batch_size=BATCH_SIZE,
    shuffle=False,
    drop_last=False
)

# ----- Model definition -----

class MLP5(nn.Module):
    def __init__(self, in_dim: int, hidden_dims=None, dropout: float = 0.2, num_classes: int = 5):
        super().__init__()
        if hidden_dims is None:
            hidden_dims = [256, 128, 64]

        layers = []
        prev = in_dim
        for h in hidden_dims:
            layers.append(nn.Linear(prev, h))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(dropout))
            prev = h

        layers.append(nn.Linear(prev, num_classes))  # logits
        self.net = nn.Sequential(*layers)

    def forward(self, x):
        return self.net(x)

# ----- Evaluation helper -----

from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix

def evaluate_multiclass(loader, mdl):
    mdl.eval()
    all_y = []
    all_pred = []
    with torch.no_grad():
        for xb, yb in loader:
            xb = xb.to(DEVICE)
            yb = yb.to(DEVICE)
            logits = mdl(xb)
            preds = torch.argmax(logits, dim=1)
            all_y.append(yb.cpu().numpy())
            all_pred.append(preds.cpu().numpy())
    y_true = np.concatenate(all_y)
    y_pred = np.concatenate(all_pred)

    acc = accuracy_score(y_true, y_pred)
    macro_f1 = f1_score(y_true, y_pred, average="macro", zero_division=0)
    return {"acc": acc, "macro_f1": macro_f1, "y_true": y_true, "y_pred": y_pred}


print("\nRunning Optuna hyperparameter search...")

def objective(trial):
    # ---- Hyperparameters to search ----
    hidden1 = trial.suggest_int("hidden1", 128, 512)
    hidden2 = trial.suggest_int("hidden2", 64, 512)
    hidden3 = trial.suggest_int("hidden3", 32, 256)
    dropout = trial.suggest_float("dropout", 0.0, 0.5)
    lr = trial.suggest_float("lr", 1e-5, 1e-3, log=True)
    weight_decay = trial.suggest_float("weight_decay", 1e-6, 1e-3, log=True)

    # ---- Build model for this trial ----
    model = MLP5(
        in_dim=X_train_proc.shape[1],
        hidden_dims=[hidden1, hidden2, hidden3],
        dropout=dropout,
        num_classes=5
    ).to(DEVICE)

    # Class weights for imbalance (computed from train labels)
    class_counts = np.bincount(y_train)
    num_classes_ = len(class_counts)
    class_weights = (class_counts.sum() / (num_classes_ * class_counts)).astype(np.float32)
    class_weights_t = torch.tensor(class_weights, dtype=torch.float32, device=DEVICE)

    criterion = nn.CrossEntropyLoss(weight=class_weights_t)
    optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)

    EPOCHS_SEARCH = 8  # small number of epochs per trial to keep search feasible

    for epoch in range(EPOCHS_SEARCH):
        model.train()
        for xb, yb in train_loader:
            xb = xb.to(DEVICE)
            yb = yb.to(DEVICE)

            optimizer.zero_grad(set_to_none=True)
            logits = model(xb)
            loss = criterion(logits, yb)
            loss.backward()
            nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()

    # Evaluate on validation set
    metrics = evaluate_multiclass(val_loader, model)
    val_macro_f1 = metrics["macro_f1"]

    # We want to maximize macro-F1
    return val_macro_f1

study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=20)  # bump this on HPC (e.g., 50–200)

print("\nBest trial found by Optuna:")
print("  Value (val macro-F1):", study.best_trial.value)
print("  Params:", study.best_trial.params)

best_params = study.best_params

print("\nTraining final model with best hyperparameters...")

model = MLP5(
    in_dim=X_train_proc.shape[1],
    hidden_dims=[
        best_params["hidden1"],
        best_params["hidden2"],
        best_params["hidden3"],
    ],
    dropout=best_params["dropout"],
    num_classes=5,
).to(DEVICE)

# Recompute class weights for final training
class_counts = np.bincount(y_train)
num_classes_ = len(class_counts)
class_weights = (class_counts.sum() / (num_classes_ * class_counts)).astype(np.float32)
class_weights_t = torch.tensor(class_weights, dtype=torch.float32, device=DEVICE)

criterion = nn.CrossEntropyLoss(weight=class_weights_t)
optimizer = torch.optim.AdamW(
    model.parameters(),
    lr=best_params["lr"],
    weight_decay=best_params["weight_decay"],
)

EPOCHS = 40
PATIENCE = 6
best_val_f1 = -np.inf
best_state = None
pat = PATIENCE

history_epochs = []
history_train_loss = []
history_val_acc = []
history_val_macro_f1 = []


for epoch in range(1, EPOCHS + 1):
    model.train()
    running_loss = 0.0
    n_batches = 0

    for xb, yb in train_loader:
        xb = xb.to(DEVICE)
        yb = yb.to(DEVICE)

        optimizer.zero_grad(set_to_none=True)
        logits = model(xb)
        loss = criterion(logits, yb)
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()

        running_loss += loss.item()
        n_batches += 1

    train_loss = running_loss / max(n_batches, 1)
    val_metrics = evaluate_multiclass(val_loader, model)

    print(
        f"Epoch {epoch:03d} | "
        f"train_loss={train_loss:.4f} | "
        f"val_acc={val_metrics['acc']:.4f} | "
        f"val_macro_f1={val_metrics['macro_f1']:.4f}"
    )

    history_epochs.append(epoch)
    history_train_loss.append(train_loss)
    history_val_acc.append(val_metrics["acc"])
    history_val_macro_f1.append(val_metrics["macro_f1"])

    # Early stopping on validation macro-F1
    if val_metrics["macro_f1"] > best_val_f1:
        best_val_f1 = val_metrics["macro_f1"]
        best_state = {
            "epoch": epoch,
            "model_state": model.state_dict(),
        }
        pat = PATIENCE
    else:
        pat -= 1
        if pat <= 0:
            print("Early stopping triggered.")
            break

# Load best model state
if best_state is not None:
    model.load_state_dict(best_state["model_state"])
    print(f"\nLoaded best model from epoch {best_state['epoch']} with val_macro_f1={best_val_f1:.4f}")
else:
    print("\nWarning: best_state is None; using last epoch model.")

[I 2026-01-13 11:20:16,430] A new study created in memory with name: no-name-b59c0669-4653-464a-88b5-f9051a7b359b



SECTION 8: HYPERPARAMETER SEARCH WITH OPTUNA
Using device: cpu

Running Optuna hyperparameter search...


[I 2026-01-13 11:20:38,715] Trial 0 finished with value: 0.5456559567360537 and parameters: {'hidden1': 286, 'hidden2': 259, 'hidden3': 134, 'dropout': 0.14656218940533366, 'lr': 3.087764792984312e-05, 'weight_decay': 1.0765871249202357e-05}. Best is trial 0 with value: 0.5456559567360537.
[I 2026-01-13 11:21:04,587] Trial 1 finished with value: 0.5052766220656038 and parameters: {'hidden1': 428, 'hidden2': 415, 'hidden3': 49, 'dropout': 0.14271776347500642, 'lr': 1.069695115282547e-05, 'weight_decay': 6.18247014184058e-05}. Best is trial 0 with value: 0.5456559567360537.
[I 2026-01-13 11:21:26,089] Trial 2 finished with value: 0.6339862328649466 and parameters: {'hidden1': 265, 'hidden2': 338, 'hidden3': 64, 'dropout': 0.3531781878078076, 'lr': 0.000933212824895327, 'weight_decay': 6.659613634825914e-05}. Best is trial 2 with value: 0.6339862328649466.
[I 2026-01-13 11:21:44,176] Trial 3 finished with value: 0.500228607507455 and parameters: {'hidden1': 134, 'hidden2': 211, 'hidden3':


Best trial found by Optuna:
  Value (val macro-F1): 0.6339862328649466
  Params: {'hidden1': 265, 'hidden2': 338, 'hidden3': 64, 'dropout': 0.3531781878078076, 'lr': 0.000933212824895327, 'weight_decay': 6.659613634825914e-05}

Training final model with best hyperparameters...
Epoch 001 | train_loss=0.9874 | val_acc=0.7711 | val_macro_f1=0.5582
Epoch 002 | train_loss=0.8252 | val_acc=0.7936 | val_macro_f1=0.5838
Epoch 003 | train_loss=0.7823 | val_acc=0.8187 | val_macro_f1=0.6070
Epoch 004 | train_loss=0.7562 | val_acc=0.8110 | val_macro_f1=0.6035
Epoch 005 | train_loss=0.7340 | val_acc=0.8130 | val_macro_f1=0.6096
Epoch 006 | train_loss=0.7193 | val_acc=0.8197 | val_macro_f1=0.6160
Epoch 007 | train_loss=0.7097 | val_acc=0.8142 | val_macro_f1=0.6057
Epoch 008 | train_loss=0.6993 | val_acc=0.8165 | val_macro_f1=0.6159
Epoch 009 | train_loss=0.6903 | val_acc=0.8113 | val_macro_f1=0.6175
Epoch 010 | train_loss=0.6871 | val_acc=0.8260 | val_macro_f1=0.6255
Epoch 011 | train_loss=0.6807 |

In [27]:
print("\nSECTION 9: FINAL TRAINING WITH BEST HYPERPARAMETERS")

print("\nTraining final model with best hyperparameters...")

model = MLP5(
    in_dim=X_train_proc.shape[1],
    hidden_dims=[
        best_params["hidden1"],
        best_params["hidden2"],
        best_params["hidden3"],
    ],
    dropout=best_params["dropout"],
    num_classes=5,  # or n_classes
).to(DEVICE)

# Recompute class weights for final training
class_counts = np.bincount(y_train)
num_classes_ = len(class_counts)
class_weights = (class_counts.sum() / (num_classes_ * class_counts)).astype(np.float32)
class_weights_t = torch.tensor(class_weights, dtype=torch.float32, device=DEVICE)

criterion = nn.CrossEntropyLoss(weight=class_weights_t)
optimizer = torch.optim.AdamW(
    model.parameters(),
    lr=best_params["lr"],
    weight_decay=best_params["weight_decay"],
)

EPOCHS = 40
PATIENCE = 6
best_val_f1 = -np.inf
best_state = None
pat = PATIENCE

history_epochs = []
history_train_loss = []
history_val_acc = []
history_val_macro_f1 = []

for epoch in range(1, EPOCHS + 1):
    model.train()
    running_loss = 0.0
    n_batches = 0

    for xb, yb in train_loader:
        xb = xb.to(DEVICE)
        yb = yb.to(DEVICE)

        optimizer.zero_grad(set_to_none=True)
        logits = model(xb)
        loss = criterion(logits, yb)
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()

        running_loss += loss.item()
        n_batches += 1

    train_loss = running_loss / max(n_batches, 1)

    # Validation metrics
    val_metrics = evaluate_multiclass(val_loader, model)

    print(
        f"Epoch {epoch:03d} | "
        f"train_loss={train_loss:.4f} | "
        f"val_acc={val_metrics['acc']:.4f} | "
        f"val_macro_f1={val_metrics['macro_f1']:.4f}"
    )

    history_epochs.append(epoch)
    history_train_loss.append(train_loss)
    history_val_acc.append(val_metrics["acc"])
    history_val_macro_f1.append(val_metrics["macro_f1"])

    # Early stopping on validation macro-F1
    if val_metrics["macro_f1"] > best_val_f1:
        best_val_f1 = val_metrics["macro_f1"]
        best_state = {
            "epoch": epoch,
            "model_state": model.state_dict(),
        }
        pat = PATIENCE
    else:
        pat -= 1
        if pat <= 0:
            print("Early stopping triggered.")
            break

# Load best model state
if best_state is not None:
    model.load_state_dict(best_state["model_state"])
    print(f"\nLoaded best model from epoch {best_state['epoch']} with val_macro_f1={best_val_f1:.4f}")
else:
    print("\nWarning: best_state is None; using last epoch model.")



SECTION 9: FINAL TRAINING WITH BEST HYPERPARAMETERS

Training final model with best hyperparameters...
Epoch 001 | train_loss=0.9820 | val_acc=0.7621 | val_macro_f1=0.5589
Epoch 002 | train_loss=0.8313 | val_acc=0.7686 | val_macro_f1=0.5616
Epoch 003 | train_loss=0.7847 | val_acc=0.8101 | val_macro_f1=0.6036
Epoch 004 | train_loss=0.7568 | val_acc=0.8069 | val_macro_f1=0.6007
Epoch 005 | train_loss=0.7360 | val_acc=0.8285 | val_macro_f1=0.6245
Epoch 006 | train_loss=0.7222 | val_acc=0.8146 | val_macro_f1=0.6058
Epoch 007 | train_loss=0.7096 | val_acc=0.8319 | val_macro_f1=0.6213
Epoch 008 | train_loss=0.6990 | val_acc=0.8241 | val_macro_f1=0.6248
Epoch 009 | train_loss=0.6934 | val_acc=0.8133 | val_macro_f1=0.6068
Epoch 010 | train_loss=0.6836 | val_acc=0.8306 | val_macro_f1=0.6230
Epoch 011 | train_loss=0.6771 | val_acc=0.8224 | val_macro_f1=0.6196
Epoch 012 | train_loss=0.6703 | val_acc=0.8228 | val_macro_f1=0.6195
Epoch 013 | train_loss=0.6665 | val_acc=0.8177 | val_macro_f1=0.6223

In [28]:
print("\nSECTION 10: FINAL EVALUATION")

def evaluate_split(name, loader, model):
    metrics = evaluate_multiclass(loader, model)
    acc = metrics["acc"]
    f1 = metrics["macro_f1"]
    preds = metrics["y_pred"]
    labels = metrics["y_true"]
    print(f"{name} -> Acc: {acc:.3f}, Macro F1: {f1:.3f}")
    return acc, f1, preds, labels

train_metrics = evaluate_split("Train", train_loader, model)
val_metrics   = evaluate_split("Val",   val_loader,   model)
test_metrics  = evaluate_split("Test",  test_loader,  model)


SECTION 10: FINAL EVALUATION
Train -> Acc: 0.832, Macro F1: 0.643
Val -> Acc: 0.828, Macro F1: 0.629
Test -> Acc: 0.827, Macro F1: 0.630


In [29]:
print("\nSECTION 11: VISUALIZATIONS")

# 8.1 Loss curve
plt.figure()
plt.plot(history_epochs, history_train_loss, label="Train Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.title("Training Loss")
plt.legend()
plt.tight_layout()
plt.savefig("training_loss.png", dpi=150)
plt.close()

# 8.2 Validation metrics curves
plt.figure()
plt.plot(history_epochs, history_val_acc, label="Val Acc")
plt.plot(history_epochs, history_val_macro_f1, label="Val Macro F1")
plt.xlabel("Epoch")
plt.ylabel("Score")
plt.title("Validation Accuracy & Macro F1")
plt.legend()
plt.tight_layout()
plt.savefig("validation_metrics.png", dpi=150)
plt.close()

# 8.3 Confusion matrix on test
_, _, test_preds, test_labels = test_metrics
cm = confusion_matrix(test_labels, test_preds)
disp = ConfusionMatrixDisplay(confusion_matrix=cm)
plt.figure()
disp.plot(values_format="d")
plt.title("Confusion Matrix - Test Set")
plt.tight_layout()
plt.savefig("confusion_matrix_test.png", dpi=150)
plt.close()

print("Saved plots:")
print("  training_loss.png")
print("  validation_metrics.png")
print("  confusion_matrix_test.png")

print("\nAll done.")


SECTION 11: VISUALIZATIONS
Saved plots:
  training_loss.png
  validation_metrics.png
  confusion_matrix_test.png

All done.


<Figure size 640x480 with 0 Axes>