- Runs muliple configurations
- Logs result
- Selects best config
- Retrains the optimized model automatically

In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler, LabelEncoder
from itertools import product
import matplotlib.pyplot as plt

%matplotlib inline

**Data preparation**

In [2]:
train_df = pd.read_csv("Fetus_trainingdata.csv")
test_df = pd.read_csv("Fetus_testingdata.csv")

X_train = train_df.drop('Outcome', axis=1)
y_train = train_df['Outcome']

X_test = test_df.drop('Outcome', axis=1)
y_test = test_df['Outcome']

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

le = LabelEncoder()
y_train = le.fit_transform(y_train)
y_test = le.transform(y_test)

X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.long)

train_dataset = list(zip(X_train, y_train))
test_dataset = list(zip(X_test, y_test))

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=True)

**Define model class**

In [3]:
class Net_VaryLayers(nn.Module):
    def __init__(self, activation_fn=nn.ReLU, num_layers=2):
        super(Net_VaryLayers, self).__init__()
        layers = []
        input_dim = 16
        hidden_dim = 32     # Increase hidden units (neurons)

        # Build hidden layer
        for i in range(num_layers - 1):
            layers.append(nn.Linear(input_dim, hidden_dim))
            layers.append(activation_fn())
            input_dim = hidden_dim

        # Final layer to 5 output classes    
        self. output_layer = nn.Linear(input_dim, 5)
        self.network = nn.Sequential(*layers)

    def forward(self, x):
        x = self.network(x)
        logits = self.output_layer(x)
        return logits

**Function to train and eval**

In [35]:
def train_test_model(config, train_loader, test_loader, epochs=50, patience=10):
    model = Net_VaryLayers(
        activation_fn=config["activation"],
        num_layers=config["num_layers"],
    )
    criterion = config["loss_fn"]()
    optimizer = optim.SGD(model.parameters(), lr=config["lr"], weight_decay=config["weight_decay"])

    training_losses, testing_losses = [], []
    training_accuracy, testing_accuracy = [], []

    # Early stopping parameters
    best_loss = np.inf
    best_epoch = 0
    patience_counter = 0
    best_model_state = None

    for epoch in range(epochs):
        model.train()
        total_training_loss, training_error, total_training = 0, 0, 0

        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            logits = model(X_batch)

            if isinstance(criterion, nn.MSELoss):
                y_onehot = nn.functional.one_hot(y_batch, num_classes=5).float()
                loss = criterion(torch.softmax(logits, dim=1), y_onehot)
            else:
                loss = criterion(logits, y_batch)

            loss.backward()
            optimizer.step()
            total_training_loss += loss.item()
            preds = torch.argmax(logits, dim=1)
            
            training_error = (preds != y_batch).sum().item()
            total_training += y_batch.size(0)

        avg_training_loss = total_training_loss / len(train_loader)
        training_accuracies = 100 * (1 - training_error / total_training)

        model.eval()
        total_testing_loss, testing_error, total_testing = 0, 0, 0

        with torch.no_grad():
            for X_test, y_test in test_loader:
                logits = model(X_test)

                if isinstance(criterion, nn.MSELoss):
                    y_onehot = nn.functional.one_hot(y_test, num_classes=5).float()
                    loss = criterion(torch.softmax(logits, dim=1), y_onehot)
                else:
                    loss = criterion(logits, y_test)

                total_training_loss += loss.item()    
                probs = torch.softmax(logits, dim=1)
                preds = torch.argmax(probs, dim=1)
                testing_error = (preds != y_test).sum().item()
                total_testing += y_test.size(0)

        avg_testing_loss = total_testing_loss / len(test_loader)
        testing_accuracies = 100 * (1 - testing_error / total_testing)

        training_losses.append(avg_training_loss)
        testing_losses.append(avg_testing_loss)
        training_accuracy.append(training_accuracies)
        testing_accuracy.append(testing_accuracies)

        print(f"Epoch [{epoch+1}/{epochs}] "
            f"Training Loss: {avg_training_loss:.4f} | Training Accuracy: {training_accuracies:.2f}% | "
            f"Testing Loss: {avg_testing_loss:.4f} | Testing Accuracy: {testing_accuracies:.2f}%")

        # Early stopping
        if avg_testing_loss < best_loss:
            best_loss = avg_testing_loss
            best_epoch = epoch
            best_model_state = model.state_dict()
            patience_counter = 0
        else:
            patience_counter += 1

            if patience_counter >= patience:
                print(f"⏸️ Early stopping triggered at epoch {epoch+1} (best epoch: {best_epoch+1})")
                model.load_state_dict(best_model_state)
                break

    num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    
    return {
        "training_losses": training_losses,
        "testing_losses": testing_losses,
        "training_accuracy": training_accuracy,
        "testing_accuracy": testing_accuracy,
        "final_training_losses": training_losses[-1],
        "final_testing_losses": testing_losses[-1],
        "final_training_accuracy": training_accuracy[-1],
        "final_testing_accuracy": testing_accuracy[-1],
        "num_params": num_params,
        "model": model
    }


**Hyperparameters grid search**

In [25]:
param_grid = {
    "lr": [0.01, 0.05, 0.1],
    "num_layers": [2, 3],
    "activation": [nn.ReLU, nn.Tanh],
    "loss_fn": [nn.CrossEntropyLoss, nn.MSELoss],
    "weight_decay": [0, 1e-4, 1e-3]
}

In [36]:
configs = [dict(zip(param_grid.keys(), v)) for v in product(*param_grid.values())]

In [37]:
results = []

for i, cfg in enumerate(configs):
    print(f"\nRunning configuration {i+1}/{len(configs)}: {cfg}")
    metrics = train_test_model(cfg, train_loader, test_loader, epochs=100, patience=10)
    metrics.update(cfg)
    results.append(metrics)


Running configuration 1/72: {'lr': 0.01, 'num_layers': 2, 'activation': <class 'torch.nn.modules.activation.ReLU'>, 'loss_fn': <class 'torch.nn.modules.loss.CrossEntropyLoss'>, 'weight_decay': 0}
Epoch [1/100] Training Loss: 1.6848 | Training Accuracy: 93.93% | Testing Loss: 0.0000 | Testing Accuracy: 72.41%
Epoch [2/100] Training Loss: 1.5776 | Training Accuracy: 94.51% | Testing Loss: 0.0000 | Testing Accuracy: 68.97%
Epoch [3/100] Training Loss: 1.4839 | Training Accuracy: 96.24% | Testing Loss: 0.0000 | Testing Accuracy: 75.86%
Epoch [4/100] Training Loss: 1.4049 | Training Accuracy: 97.11% | Testing Loss: 0.0000 | Testing Accuracy: 70.69%
Epoch [5/100] Training Loss: 1.3347 | Training Accuracy: 95.66% | Testing Loss: 0.0000 | Testing Accuracy: 77.59%
Epoch [6/100] Training Loss: 1.2744 | Training Accuracy: 97.40% | Testing Loss: 0.0000 | Testing Accuracy: 77.59%
Epoch [7/100] Training Loss: 1.2210 | Training Accuracy: 96.82% | Testing Loss: 0.0000 | Testing Accuracy: 74.14%
Epoch

In [38]:
results_df = pd.DataFrame(results)
best = results_df.sort_values("testing_accuracy", ascending=False)
print("\nBest Hyperparameters")
best


Best Hyperparameters


Unnamed: 0,training_losses,testing_losses,training_accuracy,testing_accuracy,final_training_losses,final_testing_losses,final_training_accuracy,final_testing_accuracy,num_params,model,lr,num_layers,activation,loss_fn,weight_decay
54,"[1.399248556657271, 1.0792704170400447, 0.9095...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[96.82080924855492, 96.53179190751445, 97.6878...","[91.37931034482759, 86.20689655172413, 89.6551...",0.632039,0.0,97.398844,84.482759,709,Net_VaryLayers(\n (output_layer): Linear(in_f...,0.10,2,<class 'torch.nn.modules.activation.Tanh'>,<class 'torch.nn.modules.loss.CrossEntropyLoss'>,0.0000
56,"[1.4725830554962158, 1.1098876324566929, 0.908...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[97.10982658959537, 96.82080924855492, 96.8208...","[91.37931034482759, 81.03448275862068, 84.4827...",0.623138,0.0,97.687861,91.379310,709,Net_VaryLayers(\n (output_layer): Linear(in_f...,0.10,2,<class 'torch.nn.modules.activation.Tanh'>,<class 'torch.nn.modules.loss.CrossEntropyLoss'>,0.0010
48,"[1.3200046907771716, 0.9351284449750726, 0.791...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[96.53179190751445, 97.6878612716763, 97.39884...","[89.65517241379311, 91.37931034482759, 89.6551...",0.585002,0.0,96.531792,87.931034,709,Net_VaryLayers(\n (output_layer): Linear(in_f...,0.10,2,<class 'torch.nn.modules.activation.ReLU'>,<class 'torch.nn.modules.loss.CrossEntropyLoss'>,0.0000
49,"[1.3296779719266025, 0.9241747043349526, 0.790...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[95.95375722543352, 97.10982658959537, 96.5317...","[89.65517241379311, 89.65517241379311, 87.9310...",0.590746,0.0,96.820809,87.931034,709,Net_VaryLayers(\n (output_layer): Linear(in_f...,0.10,2,<class 'torch.nn.modules.activation.ReLU'>,<class 'torch.nn.modules.loss.CrossEntropyLoss'>,0.0001
67,"[1.3872758150100708, 1.0268778204917908, 0.840...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[96.82080924855492, 96.82080924855492, 99.1329...","[89.65517241379311, 86.20689655172413, 87.9310...",0.628653,0.0,97.109827,87.931034,1765,Net_VaryLayers(\n (output_layer): Linear(in_f...,0.10,3,<class 'torch.nn.modules.activation.Tanh'>,<class 'torch.nn.modules.loss.CrossEntropyLoss'>,0.0001
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34,"[0.1623770228841088, 0.16049356081269003, 0.15...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[95.66473988439307, 95.37572254335261, 93.6416...","[55.172413793103445, 58.62068965517242, 62.068...",0.145397,0.0,96.820809,67.241379,709,Net_VaryLayers(\n (output_layer): Linear(in_f...,0.05,2,<class 'torch.nn.modules.activation.Tanh'>,<class 'torch.nn.modules.loss.MSELoss'>,0.0001
40,"[0.16133203696120868, 0.15912924977866086, 0.1...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[92.48554913294798, 92.77456647398844, 94.2196...","[55.172413793103445, 56.896551724137936, 60.34...",0.142773,0.0,96.820809,81.034483,1765,Net_VaryLayers(\n (output_layer): Linear(in_f...,0.05,3,<class 'torch.nn.modules.activation.ReLU'>,<class 'torch.nn.modules.loss.MSELoss'>,0.0001
47,"[0.16127847676927393, 0.1591263250871138, 0.15...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[93.0635838150289, 93.0635838150289, 93.063583...","[55.172413793103445, 55.172413793103445, 65.51...",0.141799,0.0,96.531792,70.689655,1765,Net_VaryLayers(\n (output_layer): Linear(in_f...,0.05,3,<class 'torch.nn.modules.activation.Tanh'>,<class 'torch.nn.modules.loss.MSELoss'>,0.0010
16,"[0.16550593213601547, 0.1651673222129995, 0.16...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[92.48554913294798, 92.48554913294798, 92.4855...","[55.172413793103445, 55.172413793103445, 55.17...",0.162266,0.0,92.485549,60.344828,1765,Net_VaryLayers(\n (output_layer): Linear(in_f...,0.01,3,<class 'torch.nn.modules.activation.ReLU'>,<class 'torch.nn.modules.loss.MSELoss'>,0.0001


In [39]:
best_cfg = best.iloc[0].to_dict()
optimized_model = train_test_model(best_cfg, train_loader, test_loader, epochs=100, patience=10)["model"]
#torch.save(optimized_model.state_dict(), "optimized_model.pt")

Epoch [1/100] Training Loss: 1.4412 | Training Accuracy: 96.82% | Testing Loss: 0.0000 | Testing Accuracy: 81.03%
Epoch [2/100] Training Loss: 1.0954 | Training Accuracy: 96.24% | Testing Loss: 0.0000 | Testing Accuracy: 94.83%
Epoch [3/100] Training Loss: 0.9103 | Training Accuracy: 96.82% | Testing Loss: 0.0000 | Testing Accuracy: 87.93%
Epoch [4/100] Training Loss: 0.8035 | Training Accuracy: 97.40% | Testing Loss: 0.0000 | Testing Accuracy: 93.10%
Epoch [5/100] Training Loss: 0.7411 | Training Accuracy: 97.11% | Testing Loss: 0.0000 | Testing Accuracy: 89.66%
Epoch [6/100] Training Loss: 0.6996 | Training Accuracy: 97.11% | Testing Loss: 0.0000 | Testing Accuracy: 87.93%
Epoch [7/100] Training Loss: 0.6756 | Training Accuracy: 97.40% | Testing Loss: 0.0000 | Testing Accuracy: 86.21%
Epoch [8/100] Training Loss: 0.6569 | Training Accuracy: 98.55% | Testing Loss: 0.0000 | Testing Accuracy: 91.38%
Epoch [9/100] Training Loss: 0.6396 | Training Accuracy: 98.55% | Testing Loss: 0.0000 |

In [40]:
best.iloc[0]

training_losses            [1.399248556657271, 1.0792704170400447, 0.9095...
testing_losses             [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...
training_accuracy          [96.82080924855492, 96.53179190751445, 97.6878...
testing_accuracy           [91.37931034482759, 86.20689655172413, 89.6551...
final_training_losses                                               0.632039
final_testing_losses                                                     0.0
final_training_accuracy                                            97.398844
final_testing_accuracy                                             84.482759
num_params                                                               709
model                      Net_VaryLayers(\n  (output_layer): Linear(in_f...
lr                                                                       0.1
num_layers                                                                 2
activation                        <class 'torch.nn.modules.activation.Tanh'>