### Imports

In [2]:
import pandas as pd
import numpy as np
import time

import torch
from torch import nn
from torch.utils.data import DataLoader, TensorDataset, Subset


from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import GroupShuffleSplit, GroupKFold

import sys
from pathlib import Path
import importlib

# Sørg for at code/ er på path
sys.path.append(str(Path("..").resolve()))

from Implementations import prepare_data
importlib.reload(prepare_data)

from Implementations.prepare_data import prepare_data


from Implementations import plot_style
importlib.reload(plot_style)
from Implementations.plot_style import set_latex_style, fig_ax, save_pdf

In [5]:
df = pd.read_parquet("../Data_Processed/player_extended_features.parquet")

data = prepare_data(df, test_size=0.2, seed=6114)
print(data["summary"])

SEED = 6114
torch.manual_seed(SEED)
np.random.seed(SEED)

device = "cpu"  # same choice as core for stability

X_train = data["X_train"]
X_test  = data["X_test"]
y_train = data["ylog_train"].ravel()
y_test  = data["ylog_test"].ravel()

feature_cols = data["feature_cols"]
groups_train = data["groups_train"]

X_train_t = torch.tensor(X_train, dtype=torch.float32)
y_train_t = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)
X_test_t  = torch.tensor(X_test,  dtype=torch.float32)
y_test_t  = torch.tensor(y_test,  dtype=torch.float32).unsqueeze(1)

train_ds_full = TensorDataset(X_train_t, y_train_t)
test_ds       = TensorDataset(X_test_t,  y_test_t)

full_train_loader = DataLoader(train_ds_full, batch_size=256, shuffle=True,  num_workers=0)
test_loader       = DataLoader(test_ds,       batch_size=2048, shuffle=False, num_workers=0)


{'n_rows_total': 278558, 'n_rows_used': 275230, 'n_features': 208, 'test_size': 0.2, 'seed': 6114, 'numeric_only': True, 'drop_na': True, 'standardize': True, 'player_overlap_train_test': 0}


In [6]:
def make_activation(name: str):
    name = name.lower()
    if name == "relu":
        return nn.ReLU
    if name == "tanh":
        return nn.Tanh
    raise ValueError("activation must be 'relu' or 'tanh'")

class MLP(nn.Module):
    def __init__(self, d_in: int, n_layers: int, n_units: int, activation: str, dropout: float = 0.1):
        super().__init__()
        Act = make_activation(activation)
        layers = []
        prev = d_in
        for _ in range(n_layers):
            layers.append(nn.Linear(prev, n_units))
            layers.append(Act())
            if dropout and dropout > 0:
                layers.append(nn.Dropout(dropout))
            prev = n_units
        layers.append(nn.Linear(prev, 1))
        self.net = nn.Sequential(*layers)

    def forward(self, x):
        return self.net(x)

@torch.no_grad()
def eval_model(model: nn.Module, loader: DataLoader):
    model.eval()
    preds, trues = [], []
    for xb, yb in loader:
        xb = xb.to(device)
        pred = model(xb).cpu().numpy()
        preds.append(pred)
        trues.append(yb.numpy())
    yhat = np.vstack(preds).ravel()
    ytrue = np.vstack(trues).ravel()
    rmse = np.sqrt(mean_squared_error(ytrue, yhat))
    r2 = r2_score(ytrue, yhat)
    return rmse, r2



In [7]:
# Best architecture from core sweep (fixed)
ACT = "relu"
N_LAYERS = 2
N_UNITS = 64
EPOCHS = 30

# Same hyperparams as sweep
LR = 5e-4
WEIGHT_DECAY = 1e-5
DROPOUT = 0.1
CLIP_NORM = 5.0

model = MLP(
    d_in=X_train.shape[1],
    n_layers=N_LAYERS,
    n_units=N_UNITS,
    activation=ACT,
    dropout=DROPOUT
).to(device)

opt = torch.optim.Adam(model.parameters(), lr=LR, weight_decay=WEIGHT_DECAY)
loss_fn = nn.MSELoss()

for epoch in range(1, EPOCHS + 1):
    model.train()
    for xb, yb in full_train_loader:
        xb, yb = xb.to(device), yb.to(device)
        pred = model(xb)
        loss = loss_fn(pred, yb)

        opt.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), CLIP_NORM)
        opt.step()

rmse_test_ext, r2_test_ext = eval_model(model, test_loader)
print(f"FINAL MLP (extended) | ReLU [64-64-1] | epochs=30 | TEST RMSE(log)={rmse_test_ext:.4f} | TEST R2={r2_test_ext:.4f}")

final_nn_extended = {
    "model": "NN",
    "features": "Extended",
    "activation": "relu",
    "architecture": "[64--64--1]",
    "epochs": EPOCHS,
    "rmse_test_log": float(rmse_test_ext),
    "r2_test": float(r2_test_ext),
}
final_nn_extended


FINAL MLP (extended) | ReLU [64-64-1] | epochs=30 | TEST RMSE(log)=0.9157 | TEST R2=0.6335


{'model': 'NN',
 'features': 'Extended',
 'activation': 'relu',
 'architecture': '[64--64--1]',
 'epochs': 30,
 'rmse_test_log': 0.9156683159695179,
 'r2_test': 0.6334825754165649}