# ML Cup

Import our own neural network code aswell as numpy and pandas for data handling and loading.

In [21]:
import numpy as np
import pandas as pd
from network import *
from sklearn.preprocessing import StandardScaler
from train_utils import *

Read the data into pandas dataframes, remove empty first column, remove ids, split into variables and class labels, convert to numpy arrays.

In [22]:
X_df = pd.read_csv("data/cup/ML-CUP25-TR.csv", sep=",", comment="#", header=None)
XBT_df = pd.read_csv("data/cup/ML-CUP25-TS.csv", sep=",", comment="#", header=None)

Y_df = X_df[[13, 14, 15, 16]]
X_df = X_df.drop(columns=[0, 13, 14, 15, 16])
XBT_df = XBT_df.drop(columns=[0])

print(X_df.shape)
print(X_df.head())
print(Y_df.shape)
print(Y_df.head())
print(XBT_df.shape)
print(XBT_df.head())

X = X_df.to_numpy()
Y = Y_df.to_numpy()
XBT = XBT_df.to_numpy()

(500, 12)
          1          2          3          4         5         6          7   \
0  -6.925642  -6.093158  -9.149763  -5.918488  4.391259 -1.059304  -5.031085   
1  -5.649870  -7.650998 -10.407383  -7.864047  3.790306 -1.673732  -8.493233   
2  15.985886  14.192953  24.466835  12.551305 -7.788409  0.557977  23.145951   
3  12.774004  10.156462  18.588934   8.346695 -5.245173 -0.199274  14.500231   
4  -4.019226  -4.043457  -5.095354  -3.147125  0.725466 -0.477673  -4.025913   

          8          9          10         11         12  
0  -6.932177  -5.805652   7.147028   4.555533  -5.694865  
1  -8.143588  -9.447557  10.790796   6.266211  -5.551301  
2  20.031774  14.516358 -21.024198 -10.410913  12.061133  
3  12.608063  12.411055 -15.479452  -8.871887   6.703585  
4  -0.995364  -3.491760   3.385533   1.838361  -4.271710  
(500, 4)
          13         14         15         16
0   6.554997  10.688732  15.416160  -7.535628
1  12.342252  -8.135250  23.787661  -3.270978
2  28.54

Scale the features.

In [23]:
X = StandardScaler().fit_transform(X)
XBT = StandardScaler().fit_transform(XBT)

X

array([[-0.88733074, -0.81159466, -0.73626082, ...,  0.73052709,
         0.83883143, -0.92278791],
       [-0.76318039, -0.96810341, -0.81269752, ...,  1.02119652,
         1.09256129, -0.9028031 ],
       [ 1.34227906,  1.22645517,  1.30691755, ..., -1.51673883,
        -1.38101066,  1.5489333 ],
       ...,
       [ 1.0711801 ,  1.07026272,  1.42800213, ..., -1.22023228,
        -1.42278381,  1.2695007 ],
       [-0.90892053, -1.26616062, -1.25202302, ...,  1.05068417,
         0.99456614, -1.03309211],
       [ 0.93705775,  0.62131415,  0.79163565, ..., -0.63697812,
        -0.57424952,  0.53928755]], shape=(500, 12))

## Initial Training
We first train with hould-out validation and hold-out test to see if training works at all.

Split data into train and validation sets, using 80% of the data for train and 20% for validation. The test set is already given as part of the dataset.

In [24]:
def split_data(X, Y, val_fraction=0.15, test_fraction=0.15, shuffle=True):
    n = X.shape[0]
    indices = np.arange(n)
    if shuffle:
        np.random.shuffle(indices)

    val_size = int(n * val_fraction)
    test_size = int(n * test_fraction)
    train_size = n - val_size - test_size

    train_indices = indices[:train_size]
    val_indices = indices[train_size:train_size + val_size]
    test_indices = indices[train_size + val_size:]

    X_train = X[train_indices]
    Y_train = Y[train_indices]
    X_val = X[val_indices]
    Y_val = Y[val_indices]
    X_test = X[test_indices]
    Y_test = Y[test_indices]

    return X_train, Y_train, X_val, Y_val, X_test, Y_test

XTr, YTr, XVl, YVl, XT, YT = split_data(X, Y)
print(XTr.shape, YTr.shape, XVl.shape, YVl.shape, XT.shape, YT.shape)

(350, 12) (350, 4) (75, 12) (75, 4) (75, 12) (75, 4)


Define datasets and dataloaders.

In [25]:
XTr_dl = DataLoader(Dataset(XTr, YTr), batch_size=32, shuffle=True)
XVl_dl = DataLoader(Dataset(XVl, YVl), batch_size=32, shuffle=False)
XT_dl = DataLoader(Dataset(XT, YT), batch_size=32, shuffle=False)

Train a small model to see if training works at all.

In [26]:
model = Model(
    LinearLayer(12, 32),
    ReLU(),
    LinearLayer(32, 32),
    ReLU(),
    LinearLayer(32, 4),
)
loss_fn = MSELoss()
optimizer = AdamWOptimizer(model, learning_rate=0.01, weight_decay=0.01)


for epoch in range(100):
    # Train
    train_total_n = 0
    train_losses = 0.0
    train_errors = 0.0
    for x_batch, y_batch in XTr_dl:
        y_pred = model.forward(x_batch)
        loss = loss_fn.forward(y_pred, y_batch)
        grad_loss = loss_fn.backward()
        model.backward(grad_loss)
        optimizer.step()
        train_total_n += y_batch.shape[0]
        train_losses += loss * y_batch.shape[0]
        result = np.sqrt(np.sum((y_pred - y_batch) ** 2, axis=1))
        train_errors += np.sum(result)

    # Validate
    val_total_n = 0
    val_losses = 0.0
    val_errors = 0.0
    for x_batch, y_batch in XVl_dl:
        y_pred = model.forward(x_batch)
        loss = loss_fn.forward(y_pred, y_batch)
        val_total_n += y_batch.shape[0]
        val_losses += loss * y_batch.shape[0]
        result = np.sqrt(np.sum((y_pred - y_batch) ** 2, axis=1))
        val_errors += np.sum(result)

    print(f"Epoch {epoch+1:03d} | Train Loss: {train_losses / train_total_n:.4f} | Train MEE: {train_errors / train_total_n:.4f} | Val Loss: {val_losses / val_total_n:.4f} | Val MEE: {val_errors / val_total_n:.4f}")

Epoch 001 | Train Loss: 302.2725 | Train MEE: 31.0522 | Val Loss: 281.3986 | Val MEE: 31.1349
Epoch 002 | Train Loss: 211.0254 | Train MEE: 26.2764 | Val Loss: 217.7835 | Val MEE: 27.5767
Epoch 003 | Train Loss: 192.8460 | Train MEE: 25.0608 | Val Loss: 215.0031 | Val MEE: 27.4665
Epoch 004 | Train Loss: 186.7045 | Train MEE: 24.6878 | Val Loss: 209.5161 | Val MEE: 27.0798
Epoch 005 | Train Loss: 182.9170 | Train MEE: 24.3624 | Val Loss: 207.6115 | Val MEE: 26.9665
Epoch 006 | Train Loss: 180.1972 | Train MEE: 24.1229 | Val Loss: 207.2831 | Val MEE: 26.9149
Epoch 007 | Train Loss: 178.1582 | Train MEE: 23.9810 | Val Loss: 202.0909 | Val MEE: 26.5973
Epoch 008 | Train Loss: 177.6372 | Train MEE: 23.8470 | Val Loss: 203.5404 | Val MEE: 26.5719
Epoch 009 | Train Loss: 175.5222 | Train MEE: 23.6984 | Val Loss: 196.2416 | Val MEE: 26.2165
Epoch 010 | Train Loss: 172.9185 | Train MEE: 23.4368 | Val Loss: 195.3089 | Val MEE: 26.0140
Epoch 011 | Train Loss: 169.1845 | Train MEE: 23.2056 | Val 

In [27]:
# Test
test_total_n = 0
test_losses = 0.0
test_errors = 0.0
for x_batch, y_batch in XT_dl:
    y_pred = model.forward(x_batch)
    loss = loss_fn.forward(y_pred, y_batch)
    test_total_n += y_batch.shape[0]
    test_losses += loss * y_batch.shape[0]
    result = np.sqrt(np.sum((y_pred - y_batch) ** 2, axis=1))
    test_errors += np.sum(result)

print(f"Test Loss: {test_losses / test_total_n:.4f} | Test MEE: {test_errors / test_total_n:.4f}")

Test Loss: 174.2556 | Test MEE: 23.4391


Because we did not scale the target variables, the MEE value is meaningless. We calculate what an blind model that would just output the standard deviation of the training data would end up having as MEE.

In [28]:
std_targets = np.std(YTr, axis=0)
mee = np.sqrt(np.sum(std_targets ** 2))
print(f"Blind model MEE: {mee}")

Blind model MEE: 37.359385103286634
