## Hyperparameter Tuning for MLP (OPS Prediction)

In [9]:
# Imports
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
import matplotlib.pyplot as plt
from itertools import product
from tqdm import tqdm


In [10]:
# Load preprocessed dataset
df = pd.read_csv('../data/processed_player_data.csv')

features = df.columns.difference(['OPS_target']).tolist()
target = 'OPS_target'

X = df[features].fillna(0).values
y = df[target].values

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42
)

X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)


In [11]:
# Define a flexible MLP model
class PlayerMLP(nn.Module):
    def __init__(self, input_dim, hidden_dims, dropout_rate):
        super().__init__()
        layers = []
        last_dim = input_dim
        for dim in hidden_dims:
            layers.append(nn.Linear(last_dim, dim))
            layers.append(nn.BatchNorm1d(dim))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(dropout_rate))
            last_dim = dim
        layers.append(nn.Linear(last_dim, 1))
        self.model = nn.Sequential(*layers)

    def forward(self, x):
        return self.model(x)


In [12]:
# Define hyperparameter grid
hidden_layer_options = [
    [32],                  # very simple
    [64, 32],              # small + 2 layers
    [128, 64],             # medium + 2 layers
    [128, 64, 32],         # medium deep
    [256, 128, 64],        # large deep
    [256, 128, 64, 32],    # larger/deeper
    [512, 256],            # wide but shallow
    [512, 256, 128],       # wide + deep
    [512, 256, 128, 64],   # very expressive
    [1024, 512, 256]       # very large (if you want to try big models)
]
dropout_options = [0.1, 0.2, 0.3, 0.4]
lr_options = [0.01, 0.005, 0.001, 0.0005, 0.0001]
batch_size_options = [16, 32, 64, 128]

param_grid = list(product(hidden_layer_options, dropout_options, lr_options, batch_size_options))
print(f"Total combinations: {len(param_grid)}")


Total combinations: 800


In [13]:
# Training function
def train_model(hidden_dims, dropout_rate, lr, batch_size):
    model = PlayerMLP(X_train.shape[1], hidden_dims, dropout_rate)
    loss_fn = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    for epoch in range(50):
        model.train()
        permutation = torch.randperm(X_train.size()[0])
        for i in range(0, X_train.size()[0], batch_size):
            indices = permutation[i:i+batch_size]
            batch_x, batch_y = X_train[indices], y_train[indices]

            optimizer.zero_grad()
            output = model(batch_x)
            loss = loss_fn(output, batch_y)
            loss.backward()
            optimizer.step()

    model.eval()
    with torch.no_grad():
        y_pred = model(X_test).numpy().flatten()
        y_true = y_test.numpy().flatten()
        mae = mean_absolute_error(y_true, y_pred)
        rmse = mean_squared_error(y_true, y_pred) ** 0.5

    return mae, rmse


In [14]:
# Run tuning with progress bar
results = []

for hidden_dims, dropout, lr, batch_size in tqdm(param_grid):
    mae, rmse = train_model(hidden_dims, dropout, lr, batch_size)
    results.append({
        'hidden_dims': hidden_dims,
        'dropout': dropout,
        'lr': lr,
        'batch_size': batch_size,
        'mae': mae,
        'rmse': rmse
    })

results_df = pd.DataFrame(results)
results_df = results_df.sort_values(by='rmse')
results_df.to_csv('../output/hyperparameter_results.csv', index=False)
results_df.head()


100%|██████████| 800/800 [02:51<00:00,  4.67it/s]


Unnamed: 0,hidden_dims,dropout,lr,batch_size,mae,rmse
40,[32],0.3,0.01,16,0.015753,0.019159
380,"[256, 128, 64]",0.4,0.01,16,0.017329,0.024747
200,"[128, 64]",0.3,0.01,16,0.019661,0.025532
420,"[256, 128, 64, 32]",0.2,0.01,16,0.019193,0.025747
280,"[128, 64, 32]",0.3,0.01,16,0.020477,0.027149
