In [3]:
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, f1_score, log_loss
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from tqdm import tqdm

import numpy as np
import pandas as pd
import warnings
from sklearn.exceptions import ConvergenceWarning

warnings.filterwarnings("ignore", category=ConvergenceWarning)

In [4]:
# Load the training dataset
train_data = pd.read_csv('sign-language-mnist/sign_mnist_train.csv')


In [5]:
# Separate the data (features) and the classes
X_train = train_data.drop(["label"], axis=1)  # Features (all columns except the first one)
X_train = X_train / 255.0
y_train = train_data['label']   # Target (first column)

# Create validation dataset by splitting training data (10% for validation)
X_train, X_validate, y_train, y_validate = train_test_split(X_train, y_train, test_size=0.1, random_state=42, stratify=y_train)


In [6]:
lambda_hyperparameter = [0, 1e-4, 1e-3, 1e-2, 0.1]

val_accs = []
val_losses = []
f1_scores_list = []

for l in lambda_hyperparameter:

    model = MLPClassifier(
        hidden_layer_sizes=(64,),
        activation='relu',
        solver='adam',
        learning_rate_init=0.001,
        alpha=l,                 # ← regularization strength
        early_stopping=True,     # ← use internal validation for early stopping
        validation_fraction=0.1, # ← 10% of X_train used internally
        n_iter_no_change=10,     # ← patience for early stopping
        max_iter=200,
        random_state=42
    )

    # Fit on *your* training set; model internally splits X_train further
    model.fit(X_train, y_train)

    # Evaluate on *your* external validation set
    y_val_pred = model.predict(X_validate)
    y_val_proba = model.predict_proba(X_validate)

    val_acc = accuracy_score(y_validate, y_val_pred)
    f1_val = f1_score(y_validate, y_val_pred, average='weighted')
    val_loss = log_loss(y_validate, y_val_proba)

    val_accs.append(val_acc)
    f1_scores_list.append(f1_val)
    val_losses.append(val_loss)

    print(
        f"Lambda={l} | Acc={val_acc:.5f} | F1={f1_val:.5f} | "
        f"Val Loss={val_loss:.5f} | Iter={model.n_iter_}"
    )

Lambda=0 | Acc=0.99891 | F1=0.99891 | Val Loss=0.04663 | Iter=91
Lambda=0.0001 | Acc=0.99927 | F1=0.99927 | Val Loss=0.04217 | Iter=91
Lambda=0.001 | Acc=0.99745 | F1=0.99745 | Val Loss=0.06699 | Iter=80
Lambda=0.01 | Acc=0.99818 | F1=0.99818 | Val Loss=0.06443 | Iter=80
Lambda=0.1 | Acc=0.99782 | F1=0.99782 | Val Loss=0.08300 | Iter=105


In [8]:
# Choose best lambda based on validation Acc
best_idx = np.argmax(val_accs)
best_lambda = lambda_hyperparameter[best_idx]

print("\n=======================")
print(" Best Regularization λ ")
print("=======================\n")
print(f"Best λ: {best_lambda}")
print(f"Validation Accuracy: {val_accs[best_idx]:.5f}")
print(f"Validation F1-score: {f1_scores_list[best_idx]:.5f}")
print(f"Validation Loss: {val_losses[best_idx]:.5f}\n")

print("Selected based on highest validation Acc.")


 Best Regularization λ 

Best λ: 0.0001
Validation Accuracy: 0.99927
Validation F1-score: 0.99927
Validation Loss: 0.04217

Selected based on highest validation Acc.
