In [1]:
import torch
import random
import pandas as pd
import numpy as np

import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader

#from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold
import matplotlib.pyplot as plt



In [2]:
torch.manual_seed(123)

train_df = pd.read_parquet("../data/training_data.parquet")

# map labels to ints
classes = sorted(train_df["label"].unique())
label2idx = {c: i for i, c in enumerate(classes)}

train_df["y"] = train_df["label"].map(label2idx)

X = train_df.filter(like="pixel_").to_numpy().astype(np.float32)
y = train_df["y"].to_numpy().astype(np.int64)
# Arrays to tensors
X = torch.from_numpy(X)
y = torch.from_numpy(y)



In [3]:
class MyModel(nn.Module):
    def __init__(self, h1, h2, dropout):
        super().__init__()

        # Fully connected layers
        self.fc1 = nn.Linear(784, h1) # 784 input features (28x28 image flattened)
        self.fc2 = nn.Linear(h1, h2)
        self.fc3 = nn.Linear(h2, 4)   # 4 output classes

        # Activation and dropout
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.dropout(x)

        x = self.relu(self.fc2(x))
        x = self.dropout(x) 

        x = self.fc3(x)
        return x

    

In [4]:
class EarlyStopping:
    def __init__(self, patience=5, min_delta=0.0):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.best_loss = None
        self.early_stop = False
        self.best_model_state = None

    def __call__(self, val_loss, model):
        if self.best_loss is None or val_loss < self.best_loss - self.min_delta:
            self.best_loss = val_loss
            self.counter = 0
            self.best_model_state = {k: v.clone() for k, v in model.state_dict().items()}
        else:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True

    def restore_best_weights(self, model):
        if self.best_model_state is not None:
            model.load_state_dict(self.best_model_state)

In [5]:
def sample_hparams(search_space, fixed):
    hparams = {}
    for key, values in search_space.items():
        hparams[key] = np.random.choice(values)
    hparams["batch_size"]=fixed["batch_size"]
    hparams["epochs"]=fixed["epochs"]
    return hparams

In [6]:
def train_fold(X_train, y_train, X_val, y_val, hparams):
    batch_size = hparams["batch_size"]
    droupout = hparams["dropout"]
    lr = hparams["lr"]
    weight_decay = hparams["weight_decay"]
    epochs = hparams["epochs"]

    # Datasets and data loaders
    train_dataset = TensorDataset(X_train.float(), y_train.long())
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

    val_dataset = TensorDataset(X_val.float(), y_val.long())
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

    # Instantiate model
    model = MyModel(h1=hparams["h1"], h2=hparams["h2"], dropout=droupout)
    # Loss 
    cost_fn = nn.CrossEntropyLoss()
    # Optimizer
    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)

    # Early Stopping 
    early_stopping = EarlyStopping(patience=5)
    epochs_run = 0

    for epoch in range(epochs):
        epochs_run += 1
        model.train()
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            y_pred = model(X_batch)
            loss = cost_fn(y_pred, y_batch)
            loss.backward()
            optimizer.step()

        # Validation
        model.eval()
        val_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for X_batch, y_batch in val_loader:

                logits = model(X_batch)
                loss = cost_fn(logits, y_batch)

                val_loss += loss.item()

                preds = logits.argmax(dim=1)
                correct += (preds == y_batch).sum().item()
                total += y_batch.size(0)

        val_loss /= len(val_loader) 
        val_accuracy = correct / total

        #--- Early Stopping --- 
        early_stopping(val_loss, model)

        if early_stopping.early_stop:
            #print(f"Stopping early at epoch {epoch+1}")
            break

    # This restores the best model
    early_stopping.restore_best_weights(model)
    return val_accuracy, epochs_run

In [7]:
# Random search space

# ORIGINAL RANDOM SEARCH
# search_space = {
# #     "lr" : [1e-4, 3e-4, 1e-3, 3e-3, 1e-2],
# #     "weight_decay" : [0.0, 1e-6, 1e-5, 1e-4, 1e-3],
# #     "dropout" : [0.0, 0.2, 0.3, 0.4, 0.5],
# #     "h1" : [64, 128, 256, 512],
# #     "h2" : [0, 32, 64, 128, 256]
# # }


# Focused search
search_space = {
    "lr": [1e-4, 3e-4, 1e-3],              
    "weight_decay": [1e-5, 1e-4, 1e-3],       
    "dropout": [0.2, 0.3, 0.4],          
    "h1": [256, 512],                      
    "h2": [64, 128, 256]                     
}

# Fixed hyperparameters
fixed = {
    "batch_size" : 64,
    "epochs" : 50
}

In [8]:
cv = StratifiedKFold(n_splits=5, shuffle=True)

n_trials = 15
results = []

for trial in range(n_trials):
    hparams = sample_hparams(search_space, fixed)

    fold_accuracies = []
    fold_epochs = []

    for fold, (train_idx, val_idx) in enumerate(cv.split(X,y), 1):
        X_train, y_train = X[train_idx], y[train_idx]
        X_val, y_val = X[val_idx], y[val_idx]

        accuracy, epochs_run = train_fold(X_train, y_train, X_val, y_val, hparams)
        fold_accuracies.append(accuracy)
        fold_epochs.append(epochs_run)
        #print(f"Fold {fold}: Validation Accuracy = {accuracy:.4f}")

    mean_accuracy = float(np.mean(fold_accuracies))
    mean_epochs = float(np.mean(fold_epochs))

    display_hparams = {
        k: (float(v) if isinstance(v, np.floating) else int(v) if isinstance(v, np.integer) else v)
        for k, v in hparams.items()
        if k not in ["batch_size", "epochs"]
    }

    results.append({
        "hparams": hparams,
        "mean_accuracy": mean_accuracy,
        "mean_epochs": mean_epochs,
    })

    print(
        f"Trial {trial+1}: Mean CV Accuracy = {mean_accuracy:.4f}, "
        f"Mean epochs until early stop = {mean_epochs:.1f}, "
        f"Hyperparams = {display_hparams}"
    )
    #print(f"Trial {trial+1}: Mean CV Accuracy: {mean_accuracy:.4f},\n Hyperparameters: {hparams}\n\n")
    
best = max(results, key=lambda x: x["mean_accuracy"])
print("\nBest config:")
disp_best = {
    k: (float(v) if isinstance(v, np.floating) else int(v) if isinstance(v, np.integer) else v)
    for k, v in best["hparams"].items()
    if k not in ["batch_size", "epochs"]
}
print(best["hparams"])
print(f"Best mean CV Accuracy: {best['mean_accuracy']:.4f}")
print(f"Average epochs until early stop = {best['mean_epochs']:.1f}")

Trial 1: Mean CV Accuracy = 0.9571, Mean epochs until early stop = 23.4, Hyperparams = {'lr': 0.0001, 'weight_decay': 0.0001, 'dropout': 0.2, 'h1': 256, 'h2': 64}
Trial 2: Mean CV Accuracy = 0.9572, Mean epochs until early stop = 21.0, Hyperparams = {'lr': 0.0003, 'weight_decay': 0.001, 'dropout': 0.4, 'h1': 512, 'h2': 64}
Trial 3: Mean CV Accuracy = 0.9575, Mean epochs until early stop = 16.4, Hyperparams = {'lr': 0.0003, 'weight_decay': 1e-05, 'dropout': 0.4, 'h1': 256, 'h2': 64}
Trial 4: Mean CV Accuracy = 0.9573, Mean epochs until early stop = 13.8, Hyperparams = {'lr': 0.0003, 'weight_decay': 1e-05, 'dropout': 0.3, 'h1': 256, 'h2': 128}
Trial 5: Mean CV Accuracy = 0.9571, Mean epochs until early stop = 13.2, Hyperparams = {'lr': 0.0003, 'weight_decay': 0.0001, 'dropout': 0.3, 'h1': 256, 'h2': 256}
Trial 6: Mean CV Accuracy = 0.9573, Mean epochs until early stop = 20.2, Hyperparams = {'lr': 0.0003, 'weight_decay': 0.001, 'dropout': 0.2, 'h1': 256, 'h2': 128}
Trial 7: Mean CV Accura