# Import required libraries

In [1]:
import pandas as pd

import torch
from torch.utils.data import TensorDataset, DataLoader

from datetime import datetime
import json

# custom utility functions
from modules.utils import Utils

# Preliminary Setup

In [2]:
utils = Utils()

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


# Need the mean and std values from training set

In [3]:
df = pd.read_csv('./data/train.csv')

In [4]:
# drop 'id' and 'label' for features
X_df = df.drop(columns=['id', 'label'])

# extract labels separately
y_df = df['label']

print("Number of feature columns:", X_df.shape[1])

Number of feature columns: 500


In [5]:
X_tensor = torch.tensor(X_df.values, dtype=torch.float32)
y_tensor = torch.tensor(y_df, dtype=torch.long)

print(X_tensor.shape)
print(y_tensor.shape)

torch.Size([115406, 500])
torch.Size([115406])


In [6]:
# NORMALIZING WITH MEAN 0 AND UNIT VARIANCE
mean = X_tensor.mean(dim=0, keepdim=True)
std = X_tensor.std(dim=0, unbiased=False, keepdim=True)

# avoid division by zero for constant columns
std[std == 0] = 1.0

X_tensor_norm = (X_tensor - mean) / std

print(X_tensor_norm.shape)
print(X_tensor_norm.mean(dim=0))  # should be ~0
print(X_tensor_norm.std(dim=0))   # should be ~1

# combine into a dataset
dataset = TensorDataset(X_tensor_norm, y_tensor)

torch.Size([115406, 500])
tensor([-6.8555e-08, -4.5615e-08,  1.3685e-08,  2.0196e-08, -1.8378e-08,
        -7.7480e-08, -1.3883e-08, -2.7634e-08, -8.6405e-08, -8.4620e-09,
        -2.9088e-08, -1.9965e-08, -2.1915e-08,  3.9732e-08, -3.6889e-08,
         1.4544e-08,  3.8542e-08,  5.5532e-09,  3.7352e-08,  4.4128e-08,
        -1.9998e-09, -4.4822e-08,  2.4857e-08,  2.0229e-08, -1.3883e-08,
         4.0327e-09,  2.8030e-08,  3.0542e-08,  2.0890e-08, -8.6603e-09,
         2.8890e-08, -1.3288e-08, -3.3848e-08,  4.3500e-08, -1.8048e-08,
        -1.4263e-08,  1.2561e-09,  1.9833e-08, -2.1089e-08, -3.1468e-08,
         2.1287e-08, -9.4701e-09,  1.9833e-09,  1.8709e-08, -3.8277e-08,
         1.9568e-08, -3.9732e-08,  1.9172e-09, -5.5267e-08,  1.5271e-08,
        -7.8670e-09,  2.0031e-08, -6.3465e-09, -2.1485e-09, -8.1645e-09,
         2.6642e-08,  4.7202e-08,  6.0027e-08, -3.1865e-08, -1.6924e-08,
         3.4972e-08, -4.0988e-08, -4.4954e-09, -6.6407e-08,  8.7661e-08,
        -4.4095e-08, -2.3

# Retrain the model from Random Search on the FULL TRAIN SET

In [7]:
# Medium MLP first submission w/modified architecture, gets 0.827 on FULL TRAIN SET
# grad_clip=True, gauss=True, patience=10. HITS 81.05% VAL ACC
params = {'hidden_size': 4096, 'lr': 0.001, 'weight_decay': 0.1, 'batch_size': 512, 'init_type': 'xavier', 'dropout': 0.5, 'noise_std': 0.65, 'num_epochs': 100, 'warmup_epochs': 12}

# reduce batch size from 768 to 512 and noise 0.65? >>> 81.11% [will submit this one on Kaggle, gets 81.05% val acc on laptop]
#                                                              [RESUBMITTING WITH TRAINING ON THE FULL TRAIN SET INSTEAD OF THE TRAIN SUBSET]

# Medium MLP second submission w/modified architecture, gets 0.834 on FULL TRAIN SET
# grad_clip=True, gauss=True, patience=10. HITS 81.45% VAL ACC
params = {'hidden_size': 4096, 'lr': 0.005, 'weight_decay': 0.05, 'batch_size': 512, 'init_type': 'xavier', 'dropout': 0.5, 'noise_std': 0.6, 'num_epochs': 100, 'warmup_epochs': 12}

# Medium MLP third submission w/modified architecture, gets 0.839 on FULL TRAIN SET
# grad_clip=True, gauss=True, patience=10. HITS 81.93% VAL ACC
# grad_clip=True, gauss=True, patience=10, label_smoothing=0.15, max_norm=10.0. HITS 82.65% VAL ACC
params = {'hidden_size': 4096, 'lr': 0.005, 'weight_decay': 0.05, 'batch_size': 512, 'init_type': 'xavier', 'dropout': 0.5, 'noise_std': 0.6, 'num_epochs': 125, 'warmup_epochs': 12}

model = utils.train(
    dataset, device, params=params, grad_clip=True, gauss=True
)

  0%|          | 0/125 [00:00<?, ?it/s]

### Save the model

In [8]:
timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
utils.save_checkpoint(model, params, f'./models/FULL-MediumMLP-model-ckpt-{timestamp}.pth', full_train_dataset=True)

# Option 2: Load the model trained on FULL TRAIN SET

In [16]:
# path should be ./models/FULL-<the rest>
path = './models/FULL-MediumMLP-model-ckpt-2025-11-26_18-42-01.pth'
model, params = utils.load_checkpoint(path, device, full_train_dataset=True)

# Initialize test set

In [9]:
df_test_clean = pd.read_csv('./data/test.csv')

df_test = df_test_clean.drop(columns=['id'])
df_test.head()

Unnamed: 0,feature_0,feature_1,feature_2,feature_3,feature_4,feature_5,feature_6,feature_7,feature_8,feature_9,...,feature_490,feature_491,feature_492,feature_493,feature_494,feature_495,feature_496,feature_497,feature_498,feature_499
0,0.0,0.0,0.113592,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.170389,0.0,0.056796,0.0,0.056796,0.0,0.0,0.0
1,0.113228,0.0,0.0,0.0,0.0,0.113228,0.226455,0.0,0.0,0.113228,...,0.0,0.113228,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.104828,...,0.0,0.0,0.0,0.0,0.209657,0.104828,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.109764,0.0,0.0,0.0,0.109764,0.0,0.0


In [10]:
# NORMALIZE WITH THE SAME MEAN/STD OBTAINED FROM THE TRAINING SET
test_tensor = torch.tensor(df_test.values, dtype=torch.float32)

# Apply same training mean/std
test_tensor_norm = (test_tensor - mean) / std

print(test_tensor_norm.shape)
print(test_tensor_norm.mean(dim=0))  # should be ~0
print(test_tensor_norm.std(dim=0))   # should be ~1

torch.Size([49460, 500])
tensor([-5.3581e-04,  3.1035e-03,  1.0709e-02, -4.0309e-03,  6.5904e-03,
         1.3172e-03,  4.1022e-03, -1.2916e-03, -1.0826e-02, -2.2387e-04,
        -1.8983e-03, -2.9043e-03, -6.9165e-04, -7.7687e-03, -1.9963e-03,
        -3.7147e-03, -8.7191e-03,  3.6268e-03, -3.0633e-03, -1.7393e-03,
        -3.6079e-03, -3.8107e-03,  6.9915e-03,  7.1280e-03,  3.6513e-03,
         6.3325e-03, -4.6298e-03,  2.9175e-03, -3.7233e-03,  5.6264e-03,
         5.4351e-03,  4.6136e-03, -9.8161e-04,  3.4426e-03,  3.9129e-03,
        -6.3146e-03, -4.1739e-03, -6.3719e-03,  1.1527e-04, -7.9489e-03,
        -4.6441e-03,  6.3852e-03, -5.6621e-03,  2.5581e-03, -3.6894e-03,
        -6.6946e-03,  1.0695e-02,  4.9246e-03,  7.8550e-03,  1.5818e-03,
         4.2177e-03, -3.1513e-03, -5.2909e-03, -2.0714e-03,  6.0393e-04,
        -4.7644e-03,  4.5614e-03, -3.3923e-03, -7.5719e-03,  1.6721e-03,
         5.4259e-03, -1.4520e-02, -5.7779e-03, -8.1507e-03, -7.2291e-04,
        -2.5344e-03,  2.91

# Evaluate the model

In [11]:
test_set = TensorDataset(test_tensor_norm) # PUT THE NORMALIZED TEST TENSOR, NOT THE RAW ONE
test_loader = DataLoader(test_set, batch_size=128, shuffle=False)

model.eval()
predictions = []

with torch.no_grad():
    for (X_batch,) in test_loader: # make sure to unpack tuple from TensorDataset
        X_batch = X_batch.to(device)

        outputs = model(X_batch)
        _, preds = torch.max(outputs, 1) # class 0-49
        predictions.extend(preds.cpu().numpy()) # move to cpu >>> convert to numpy

preds_df = pd.DataFrame({
    'id': df_test_clean['id'].values,   # original test IDs
    'label': predictions                # predicted class labels
})
preds_df

Unnamed: 0,id,label
0,0,0
1,1,7
2,2,3
3,3,15
4,4,42
...,...,...
49455,49455,35
49456,49456,43
49457,49457,3
49458,49458,47


# Save predictions and hyperparameters

In [12]:
# CSV
timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
filename = f"./submissions/MediumMLP-predictions-{timestamp}.csv"
preds_df.to_csv(filename, index=False)

# JSON
filename = f'./submissions/MediumMLP-params-{timestamp}.json'
with open(filename, 'w') as f:
    json.dump(params, f, indent=4)

with open(filename, 'r') as f:
    loaded_params = json.load(f)

print(loaded_params)

{'hidden_size': 4096, 'lr': 0.005, 'weight_decay': 0.05, 'batch_size': 512, 'init_type': 'xavier', 'dropout': 0.5, 'noise_std': 0.6, 'num_epochs': 125, 'warmup_epochs': 12}


In [13]:
class_counts = preds_df['label'].value_counts().sort_index()
print(class_counts)

label
0     1011
1      915
2      982
3      988
4      981
5      981
6      937
7     1025
8     1013
9      848
10     922
11     932
12    1048
13     957
14    1021
15     921
16     963
17    1063
18    1006
19     965
20    1036
21     925
22    1062
23    1103
24    1006
25    1088
26     934
27    1037
28     949
29     991
30     906
31     951
32    1052
33     961
34    1009
35    1009
36     982
37     945
38     989
39    1025
40    1113
41     919
42    1019
43     945
44    1055
45     955
46    1032
47    1029
48     997
49     957
Name: count, dtype: int64
