In [51]:
from sklearn.preprocessing import (
    MaxAbsScaler,
    MinMaxScaler,
    Normalizer,
    PowerTransformer,
    QuantileTransformer,
    RobustScaler,
    StandardScaler,
    minmax_scale,
)
from sklearn.metrics import recall_score, accuracy_score,f1_score, precision_score, roc_auc_score
from sklearn.model_selection import train_test_split, StratifiedKFold
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader

import optuna
import pandas as pd
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score, roc_auc_score

In [52]:
randomState = 42
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)
raw_dataset = pd.read_csv("./data/processed_data.csv") #data has X and Y
X = raw_dataset.drop(columns=["DR"])
Y = pd.DataFrame(raw_dataset["DR"])

#* 90/10 split for training and final test
X_FOR_FOLDS, X_FINAL_TEST, Y_FOR_FOLDS, Y_FINAL_TEST = train_test_split(X, Y, test_size=0.1, random_state=randomState, stratify=Y)

Using device: cpu


In [53]:
def FOLDS_GENERATOR(X, Y, normalisation_method=MinMaxScaler(), n_splits=5, randomState=None, oversample=False):
    
    """
    Generates stratified folds with specified normalization.
    
    For list of scalers, see:
    https://scikit-learn.org/stable/api/sklearn.preprocessing.html
    
    For more details on scaling and normalization effects, see:
    https://scikit-learn.org/stable/auto_examples/preprocessing/plot_all_scaling.html#
    
    normalisation_method should be an instance of a scaler, e.g.,
    - MinMaxScaler()
    - MaxAbsScaler()
    - Quantile_Transform(output_distribution='uniform')
    
    Returns a list of tuples, each containing:
    (X_train_scaled, X_test_scaled, Y_train, Y_test), representing data for each fold
    """
    kF = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=randomState)
    kFolds_list = []
    
    for fold, (train_idx, test_idx) in enumerate(kF.split(X, Y)):
        # Split the data into training and testing sets for this fold
        X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
        Y_train, Y_test = Y.iloc[train_idx], Y.iloc[test_idx]
        
        # Fit the scaler on the training data and transform both train and test sets
        X_train_scaled = normalisation_method.fit_transform(X_train)
        X_test_scaled = normalisation_method.transform(X_test)
        
        if oversample:
            # Oversample the training data if needed (e.g., using SMOTE or similar techniques)
            # This is a placeholder; actual oversampling code should be implemented here
            # X_train_scaled....
            pass
        
        # Convert back to DataFrame to maintain column names
        X_train_scaled = pd.DataFrame(X_train_scaled, columns=X.columns, index=X_train.index)
        X_test_scaled = pd.DataFrame(X_test_scaled, columns=X.columns, index=X_test.index)
        
        # Ensure 'gender' is still binary (0 or 1)
        if X_train_scaled['Gender'].isin([0, 1]).all():
            kFolds_list.append((X_train_scaled, X_test_scaled, Y_train, Y_test))
        else:
            print("Warning: 'gender' column contains unexpected values after scaling.") 
               
        print(f"Fold: {fold+1}, Train: {kFolds_list[fold][0].shape}, Test: {kFolds_list[fold][1].shape}")   
    return kFolds_list

def init_weights(model): #tested already
    if isinstance(model, nn.Linear):  # Apply only to linear layers
        nn.init.xavier_uniform_(model.weight)
        if model.bias is not None:
            nn.init.zeros_(model.bias)
            
def fold_to_dataloader_tensor(train_x, test_x, train_y, test_y, batch_size=64, device=device):
    train_dataset = TensorDataset(
        torch.tensor(train_x.values,dtype=torch.float32).to(device), 
        torch.tensor(train_y.values,dtype=torch.float32).to(device))
    val_dataset = TensorDataset(
        torch.tensor(test_x.values,dtype=torch.float32).to(device), 
        torch.tensor(test_y.values,dtype=torch.float32).to(device))

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=len(val_dataset), shuffle=False)
    return train_loader, val_loader 

In [54]:
kFolds = FOLDS_GENERATOR(X_FOR_FOLDS, Y_FOR_FOLDS, normalisation_method=MinMaxScaler(), n_splits=5, randomState=randomState)

Fold: 1, Train: (4593, 28), Test: (1149, 28)
Fold: 2, Train: (4593, 28), Test: (1149, 28)
Fold: 3, Train: (4594, 28), Test: (1148, 28)
Fold: 4, Train: (4594, 28), Test: (1148, 28)
Fold: 5, Train: (4594, 28), Test: (1148, 28)


In [55]:
import torch
from torch.utils.data import TensorDataset, DataLoader
import torch
import torch.nn as nn
import torch.optim as optim
from modularModels1 import BlockMaker, modularNN, BasicModel
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using", device)

def init_weights(model): #tested already
    if isinstance(model, nn.Linear):  # Apply only to linear layers
        nn.init.xavier_uniform_(model.weight)
        if model.bias is not None:
            nn.init.zeros_(model.bias)
            
def fold_to_dataloader_tensor(train_x, test_x, train_y, test_y, batch_size=64, device=device):
    train_dataset = TensorDataset(
        torch.tensor(train_x.values,dtype=torch.float32).to(device), 
        torch.tensor(train_y.values,dtype=torch.float32).to(device))
    val_dataset = TensorDataset(
        torch.tensor(test_x.values,dtype=torch.float32).to(device), 
        torch.tensor(test_y.values,dtype=torch.float32).to(device))

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=len(val_dataset), shuffle=False)
    return train_loader, val_loader 


def get_feature_count(loader):
    """returns the number of features in the dataset"""
    return next(iter(loader))[0].shape[1]

Using cpu


In [56]:
from Criterion_Models import *
def criterion_mapping(criterion_choice:str, pos_weight:float=None):
    """
    Feel free to add any custom loss functions here.
    returns function for criterion
    """
    if criterion_choice == "FocalLoss":
        return FocalLoss()
    elif criterion_choice == "DiceLoss":
        return DiceLoss()
    elif criterion_choice == "BCEWithLogitsLoss":
        return nn.BCEWithLogitsLoss(pos_weight=torch.tensor([pos_weight])) if pos_weight else nn.BCEWithLogitsLoss()
    return nn.BCEWithLogitsLoss() 

In [57]:
class BinaryClassifier(nn.Module):
    def __init__(self, input_dim, hidden_dim, dropout):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim, 1),
            # nn.Sigmoid()
   
        )

    def forward(self, x):
        return self.net(x)
    
    def last_layer(self):
        return self.net[-1]

In [None]:
# test_model = BinaryClassifier(input_dim=get_feature_count(train_loader), hidden_dim=64, dropout=0.5).to(device)
# print(get_feature_count(train_loader))

28


In [None]:
def train_and_evaluate(model, criterion, optimiser, scheduler, train_loader, val_loader, epochs=20, patience=5, device=device):
    if isinstance(model.last_layer(), nn.Sigmoid) and isinstance(criterion, nn.BCEWithLogitsLoss):
        raise ValueError("Model output is Sigmoid but criterion is BCEWithLogitsLoss. Please check your model and criterion compatibility.")

    
    accuracy_list = []
    precision_list = []
    recall_list = []
    f1_list = []
    auc_list = []

    best_val_loss = float('inf')
    best_model_state = None
    wait = 0

    #* Set model to training mode: essential for dropout and batch norm layers
    model.train()
    #* Epoch Training loop for this fold
    for epoch in range(1,epochs+1):
        running_loss = 0.0 #? loss for this epoch
        #* Mini-batch training loop
        for batch, (inputs, labels) in enumerate(train_loader,start=1):
            optimiser.zero_grad() #? Zero the gradients
            outputs = model(inputs) #? Forward pass through the model
            loss = criterion(outputs, labels) #? Calculate loss
            loss.backward() #? Backpropagation
            running_loss += loss.item()
            optimiser.step() #? Update weights
            if scheduler:
                scheduler.step()
                
        train_loss = running_loss / len(train_loader)
        print(f"Epoch: {epoch}, training loss: {train_loss:.4f}")
    
        #* Now we evaluate the model on the validation set, to track training vs validation loss
        model.eval() #? Set model to evaluation mode
        with torch.no_grad(): #? No need to track gradients during evaluation
            val_loss = 0.0    
            for batch, (inputs, labels) in enumerate(val_loader,start=1):#! one pass because val_loader batch size is all, if you want to do it in mini-batches, you MUST change the metric calculations to accept mini-batches
                outputs = model(inputs)
                if isinstance(model.last_layer(), nn.Sigmoid):
                    predictions = (outputs > 0.5).float().cpu() #? assume model output is 1s and 0s
                else: #? if model output is logits, convert to binary predictions
                    predictions = (torch.sigmoid(outputs) > 0.5).float().cpu()
                labels = labels.cpu() 
                loss = criterion(predictions, labels)
                val_loss += loss.item() #? Calculate loss
                avg_val_loss = val_loss / len(val_loader)
                print(f"Epoch {epoch}, Val Loss: {avg_val_loss:.4f}")
        
                # Early stopping
                if avg_val_loss < best_val_loss:
                    best_val_loss = avg_val_loss
                    best_model_state = model.state_dict()
                    wait = 0
                else:
                    wait += 1
                    if wait >= patience:
                        print(f"Early stopping triggered at epoch {epoch+1}")
                        break
    
    #* Use best model to calculate metrics on the validation set
    #! must be outside epoch loop, it comes after the training and cv loop
    model.load_state_dict(best_model_state) #? Load the best model state
    with torch.no_grad():
        for batch, (inputs, labels) in enumerate(val_loader,start=1):#! one pass because val_loader batch size is all, if you want to do it in mini-batches, you MUST change the metric calculations to accept mini-batches
                outputs = model(inputs)
                if isinstance(model.last_layer(), nn.Sigmoid):
                    predictions = (outputs > 0.5).float().cpu() #? assume model output is 1s and 0s
                else: #? if model output is logits, convert to binary predictions
                    predictions = (torch.sigmoid(outputs) > 0.5).float().cpu()
                labels = labels.cpu() 
                loss = criterion(predictions, labels)
                val_loss += loss.item() #? Calculate loss
                
    #! The following should have length equal to fold number           
    accuracy_list.append(accuracy_score(labels, predictions)) 
    precision_list.append(precision_score(labels, predictions, pos_label=1, zero_division=0)) 
    recall_list.append(recall_score(labels, predictions, pos_label=1))
    f1_list.append(f1_score(labels, predictions, pos_label=1))
    auc_list.append(roc_auc_score(labels, predictions)) 

    return model, accuracy_list, precision_list, recall_list, f1_list, auc_list 


In [None]:
def objective(trial):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    # Model hyperparameters (first-level optimization)
    hidden_dim = trial.suggest_int("hidden_dim", 16, 128)
    dropout = trial.suggest_float("dropout", 0.1, 0.5)
    initial_lr = trial.suggest_float("initial_lr", 1e-5, 1e-3, log=True)
    max_lr = trial.suggest_float("max_lr", 1e-3, 1e-1, log=True)
    
    # Loss function hyperparameters
    criterion_choice = trial.suggest_categorical("criterion", ["BCEWithLogitsLoss", "FocalLoss", "DiceLoss"])
    
    # Hyperparameter exploration optimization
    if criterion_choice == "BCEWithLogitsLoss":
        pos_weight = trial.suggest_int("pos_weight", 1, 10)
    else:
        pos_weight = None
    
    # Initialize lists for metrics across folds
    accuracy_list = []
    precision_list = []
    recall_list = []
    f1_list = []
    auc_list = []

    # Cross-validation loop
    for fold, (train_x, test_x, train_y, test_y) in enumerate(kFolds, start=1):
        # Create DataLoader for current fold
        train_loader, val_loader = fold_to_dataloader_tensor(train_x, test_x, train_y, test_y, batch_size=64, device=device)
        # Calculate steps_per_epoch from the current fold's train_loader
        train_loader_len = len(train_loader)
        
        # Instantiate and initialize the model
        model = BinaryClassifier(input_dim=get_feature_count(train_loader), hidden_dim=hidden_dim, dropout=dropout)
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        model.to(device)
        model.apply(init_weights)
        
        # Map the choice to the actual loss function
        criterion = criterion_mapping(criterion_choice, pos_weight).to(device)
        optimiser = optim.Adam(model.parameters(), lr=initial_lr)
        
        # Initialize scheduler
        scheduler = torch.optim.lr_scheduler.OneCycleLR(
            optimiser,
            max_lr=max_lr,
            steps_per_epoch=train_loader_len,
            epochs=100,
            anneal_strategy='linear'
        )
        print(f"Fold {fold}:")
        # Train and evaluate the model on the current fold
        model, accuracy, precision, recall, f1, auc = train_and_evaluate(
            model, criterion, optimiser, scheduler, train_loader, val_loader, epochs=1000, patience=10, device=device
        )

        # Append the metrics from the current fold
        accuracy_list.append(accuracy)
        precision_list.append(precision)
        recall_list.append(recall)
        f1_list.append(f1)
        auc_list.append(auc)

    # Calculate the average metrics across all folds
    avg_accuracy = np.sum(accuracy_list) / len(accuracy_list)
    avg_precision = np.sum(precision_list) / len(precision_list)
    avg_recall = np.sum(recall_list) / len(recall_list)
    avg_f1 = np.sum(f1_list) / len(f1_list)
    avg_auc = np.sum(auc_list) / len(auc_list)

    # Combine metrics into a single "score"
    combined_score = (avg_f1 + avg_precision + avg_recall + avg_accuracy + avg_auc) / 5

    return combined_score


In [71]:
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=3)  # You can adjust the number of trials

print("Best trial:")
trial = study.best_trial
print(f"  Combined score: {trial.value}")
print("  Best hyperparameters:")
for key, value in trial.params.items():
    print(f"    {key}: {value}")

[I 2025-04-06 13:09:57,809] A new study created in memory with name: no-name-b50dd234-8af0-4c36-8a99-a4abc7af430b


Epoch: 1, training loss: 0.0305
Epoch 1, Val Loss: 0.0433
Epoch: 2, training loss: 0.0217
Epoch 2, Val Loss: 0.0433
Epoch: 3, training loss: 0.0210
Epoch 3, Val Loss: 0.0433
Epoch: 4, training loss: 0.0204
Epoch 4, Val Loss: 0.0433
Epoch: 5, training loss: 0.0198
Epoch 5, Val Loss: 0.0433
Epoch: 6, training loss: 0.0195
Epoch 6, Val Loss: 0.0434
Early stopping triggered at epoch 7
Epoch: 7, training loss: 0.0193
Epoch 7, Val Loss: 0.0434
Early stopping triggered at epoch 8
Epoch: 8, training loss: 0.0193
Epoch 8, Val Loss: 0.0438
Early stopping triggered at epoch 9
Epoch: 9, training loss: 0.0192
Epoch 9, Val Loss: 0.0439
Early stopping triggered at epoch 10
Epoch: 10, training loss: 0.0192
Epoch 10, Val Loss: 0.0437
Early stopping triggered at epoch 11
Epoch: 11, training loss: 0.0190
Epoch 11, Val Loss: 0.0435
Early stopping triggered at epoch 12
Epoch: 12, training loss: 0.0190
Epoch 12, Val Loss: 0.0435
Early stopping triggered at epoch 13
Epoch: 13, training loss: 0.0191
Epoch 13,

[I 2025-04-06 13:10:36,203] Trial 0 finished with value: 0.4271857155450924 and parameters: {'hidden_dim': 49, 'dropout': 0.12232693384573157, 'initial_lr': 0.0001237069033410854, 'max_lr': 0.00818457543768845, 'criterion': 'FocalLoss'}. Best is trial 0 with value: 0.4271857155450924.


Epoch: 1, training loss: 0.8091
Epoch 1, Val Loss: 0.7945
Epoch: 2, training loss: 0.7700
Epoch 2, Val Loss: 0.7686
Epoch: 3, training loss: 0.6546
Epoch 3, Val Loss: 0.9062
Epoch: 4, training loss: 0.8113
Epoch 4, Val Loss: 0.9516
Epoch: 5, training loss: 0.8109
Epoch 5, Val Loss: 0.9512
Epoch: 6, training loss: 0.8102
Epoch 6, Val Loss: 0.9508
Epoch: 7, training loss: 0.8109
Epoch 7, Val Loss: 0.9669
Early stopping triggered at epoch 8
Epoch: 8, training loss: 0.8130
Epoch 8, Val Loss: 0.9669
Early stopping triggered at epoch 9
Epoch: 9, training loss: 0.8053
Epoch 9, Val Loss: 0.9833
Early stopping triggered at epoch 10
Epoch: 10, training loss: 0.8080
Epoch 10, Val Loss: 0.9832
Early stopping triggered at epoch 11
Epoch: 11, training loss: 0.8039
Epoch 11, Val Loss: 0.9831
Early stopping triggered at epoch 12
Epoch: 12, training loss: 0.8013
Epoch 12, Val Loss: 0.9829
Early stopping triggered at epoch 13
Epoch: 13, training loss: 0.8027
Epoch 13, Val Loss: 0.9829
Early stopping tri

[I 2025-04-06 13:11:17,138] Trial 1 finished with value: 0.3610518664909365 and parameters: {'hidden_dim': 21, 'dropout': 0.27619523457000617, 'initial_lr': 0.00034003781248143354, 'max_lr': 0.008585521268761685, 'criterion': 'DiceLoss'}. Best is trial 0 with value: 0.4271857155450924.


Epoch: 98, training loss: 0.7448
Epoch 98, Val Loss: 0.9355
Early stopping triggered at epoch 99
Epoch: 99, training loss: 0.7439
Epoch 99, Val Loss: 0.9355
Early stopping triggered at epoch 100
Epoch: 100, training loss: 0.7445
Epoch 100, Val Loss: 0.9355
Early stopping triggered at epoch 101
Epoch: 1, training loss: 0.0319
Epoch 1, Val Loss: 0.0433
Epoch: 2, training loss: 0.0226
Epoch 2, Val Loss: 0.0433
Epoch: 3, training loss: 0.0218
Epoch 3, Val Loss: 0.0433
Epoch: 4, training loss: 0.0211
Epoch 4, Val Loss: 0.0433
Epoch: 5, training loss: 0.0205
Epoch 5, Val Loss: 0.0433
Epoch: 6, training loss: 0.0200
Epoch 6, Val Loss: 0.0434
Early stopping triggered at epoch 7
Epoch: 7, training loss: 0.0196
Epoch 7, Val Loss: 0.0434
Early stopping triggered at epoch 8
Epoch: 8, training loss: 0.0195
Epoch 8, Val Loss: 0.0437
Early stopping triggered at epoch 9
Epoch: 9, training loss: 0.0193
Epoch 9, Val Loss: 0.0435
Early stopping triggered at epoch 10
Epoch: 10, training loss: 0.0193
Epoch

[I 2025-04-06 13:11:55,446] Trial 2 finished with value: 0.42004642322622165 and parameters: {'hidden_dim': 56, 'dropout': 0.45006601765343024, 'initial_lr': 7.974732720846649e-05, 'max_lr': 0.005485062834975952, 'criterion': 'FocalLoss'}. Best is trial 0 with value: 0.4271857155450924.


Best trial:
  Combined score: 0.4271857155450924
  Best hyperparameters:
    hidden_dim: 49
    dropout: 0.12232693384573157
    initial_lr: 0.0001237069033410854
    max_lr: 0.00818457543768845
    criterion: FocalLoss
