# Optuna Trials For Baseline Model

In [18]:
# Standard library imports
import datetime
import os
from collections import deque

# Third-party imports
import matplotlib.pyplot as plt
import numpy as np
import optuna
import pandas as pd
import seaborn as sns
import torch
import torch.nn as nn
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset
from torchsummary import summary
from tqdm import tqdm

if os.path.exists('/workspace/data'):
    # Load the dictionary of DataFrames from the pickle
    data_path = '/workspace/data/'
else:
    data_path = '../data/'
    
if torch.cuda.is_available() == False:
    RuntimeError("GPU detected: False")
else:
    print("The GPU is detected.")


The GPU is detected.


## Create the model
We make a basic NN for binary classification that takes as input a list of integers that correspond to the out_features of each linear layer. 

In [3]:
import torch.nn as nn

class Model(nn.Module):
    def __init__(self, in_features, out_features):
        """Initializes the model layers.

        Args:
            in_features (int): The number of input features of the dataset.
            out_features (list): The number of units in each linear layer.
        """
        # Call the parent class (nn.Module) initializer first
        super(Model, self).__init__()
        
        layers = []
        input_dropout = 0.2
        dropout = 0.3

        # Input dropout layer
        layers.append(nn.Dropout(input_dropout))
    
        # Build layers dynamically
        for out_feature in out_features:
            layers.append(nn.Linear(in_features, out_feature))
            layers.append(nn.BatchNorm1d(out_feature))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(dropout))
            in_features = out_feature
        
        # Final output layer for binary classification (with 1 output node)
        layers.append(nn.Linear(in_features, 1))
        
        # Store the sequence of layers
        self.sequential = nn.Sequential(*layers)
        
    def forward(self, x):
        """Forward pass of the model."""
        return self.sequential(x)


In [5]:
df = pd.read_csv("data.csv")

features = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']
X = df[features].values  # Convert to numpy array
y = df['label'].astype(float).values  # Convert to numpy array
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=.7)
X_test, X_val, y_test, y_val = train_test_split(X_test, y_test, train_size=.5)

# Convert the splits to PyTorch tensors and reshape y to be 2D
X_train, y_train = torch.tensor(X_train, dtype=torch.float32), torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)
X_test, y_test = torch.tensor(X_test, dtype=torch.float32), torch.tensor(y_test, dtype=torch.float32).unsqueeze(1)
X_val, y_val = torch.tensor(X_val, dtype=torch.float32), torch.tensor(y_val, dtype=torch.float32).unsqueeze(1)


In [6]:
num_workers = 16
batch_size = 8

def prepare_data_loaders(X_train, y_train, X_test, y_test, X_val, y_val, batch_size=batch_size, num_workers=num_workers):
    # Convert datasets to TensorDataset (pairs features and labels)
    train_dataset = TensorDataset(X_train, y_train)
    test_dataset = TensorDataset(X_test, y_test)
    val_dataset = TensorDataset(X_val, y_val)
    
    # Create DataLoader objects for train, test, and validation datasets
    loaders = {
        "train": DataLoader(train_dataset, batch_size=batch_size,drop_last=True, num_workers=num_workers, shuffle=True, pin_memory=True, persistent_workers=True),
        "test": DataLoader(test_dataset, batch_size=batch_size, num_workers=num_workers, shuffle=True, pin_memory=True, persistent_workers=True),
        "val": DataLoader(val_dataset, batch_size=batch_size, num_workers=num_workers, shuffle=True, pin_memory=True, persistent_workers=True),
    }
    return loaders

## Train & Test Functions
Here we have basic train and test functions.

In [None]:
def train_epoch(model, loaders, criterion, optimizer, num_epochs, epoch, device):
    model.train()
    
    # Use tqdm to display progress bar for the training loop
    train_loader_tqdm = tqdm(loaders['train'], desc=f'Epoch {epoch+1}/{num_epochs}', unit='batch')
    
    running_loss = deque(maxlen=10000)
    
    # Train epoch
    for X_train, y_train in train_loader_tqdm:
        optimizer.zero_grad()
        
        X_train_gpu = X_train.to(device)
        y_train_gpu = y_train.to(device)
        
        output_gpu = model(X_train_gpu)
        
        loss = criterion(output_gpu, y_train_gpu)
        running_loss.append(loss.item())  # Store loss for averaging
        
        loss.backward()
        optimizer.step()
        
        # Calculate and set the average loss for the tqdm progress bar
        avg_loss = sum(running_loss) / len(running_loss) if len(running_loss) > 0 else 0
        train_loader_tqdm.set_postfix(loss=f"{avg_loss:.4f}")

    return

def test_model(model, loaders, criterion, device, num_epochs, epoch, loader='test'):
    # Validate epoch:
    model.eval()
    test_loader_tqdm = tqdm(loaders[loader], desc=f'Epoch {epoch+1}/{num_epochs}', unit='batch')
    test_loss = []
    num_tested = []
    correct_pred = 0
    
    with torch.no_grad():
        for X_test, y_test in test_loader_tqdm:
            X_test_gpu = X_test.to(device)
            y_test_gpu = y_test.to(device)
            
            output_gpu = model(X_test_gpu)
            
            # Accumulate test loss
            test_loss.append(criterion(output_gpu, y_test_gpu).item() * X_test.shape[0])
            num_tested.append(X_test.shape[0])
            
            # Calculate number of correct predictions for binary classification
            correct_pred += torch.sum(((nn.Sigmoid()(output_gpu) > 0.5) == y_test_gpu).float()).item()
            
            test_loader_tqdm.set_postfix(loss=f"{sum(test_loss) / sum(num_tested):.4f}", acc=f"{correct_pred / sum(num_tested):.4f}")
        
        # Calculate average loss and accuracy
        avg_loss = sum(test_loss) / sum(num_tested)
        accuracy = correct_pred / sum(num_tested)
        
        
    
    return avg_loss, accuracy

## Train & Test Functions
Here we have the same train and test functions as above, but without the progress bars.

In [8]:
def train_epoch(model, loaders, criterion, optimizer, num_epochs, epoch, device):
    model.train()
    
    # Use tqdm to display progress bar for the training loop
    # train_loader_tqdm = tqdm(loaders['train'], desc=f'Epoch {epoch+1}/{num_epochs}', unit='batch')
    
    running_loss = deque(maxlen=10000)
    
    # Train epoch
    for X_train, y_train in loaders['train']:
        optimizer.zero_grad()
        
        X_train_gpu = X_train.to(device)
        y_train_gpu = y_train.to(device)
        
        output_gpu = model(X_train_gpu)
        
        loss = criterion(output_gpu, y_train_gpu)
        running_loss.append(loss.item())  # Store loss for averaging
        
        loss.backward()
        optimizer.step()
        
        # Calculate and set the average loss for the tqdm progress bar
        avg_loss = sum(running_loss) / len(running_loss) if len(running_loss) > 0 else 0
        # train_loader_tqdm.set_postfix(loss=f"{avg_loss:.4f}")

    return

def test_model(model, loaders, criterion, device, num_epochs, epoch, loader='test'):
    # Validate epoch:
    model.eval()
    # test_loader_tqdm = tqdm(loaders[loader], desc=f'Epoch {epoch+1}/{num_epochs}', unit='batch')
    test_loss = []
    num_tested = []
    correct_pred = 0
    
    with torch.no_grad():
        for X_test, y_test in loaders[loader]:
            X_test_gpu = X_test.to(device)
            y_test_gpu = y_test.to(device)
            
            output_gpu = model(X_test_gpu)
            
            # Accumulate test loss
            test_loss.append(criterion(output_gpu, y_test_gpu).item() * X_test.shape[0])  # Fix here
            num_tested.append(X_test.shape[0])  # Fix here
            
            # Calculate number of correct predictions for binary classification
            correct_pred += torch.sum(((nn.Sigmoid()(output_gpu) > 0.5) == y_test_gpu).float()).item()
            
            # test_loader_tqdm.set_postfix(loss=f"{sum(test_loss) / sum(num_tested):.4f}", acc=f"{correct_pred / sum(num_tested):.4f}")
        
        # Calculate average loss and accuracy
        avg_loss = sum(test_loss) / sum(num_tested)
        accuracy = correct_pred / sum(num_tested)
        
        
    
    return avg_loss, accuracy


In [9]:
def objective(trial, num_layers, min_out, max_out, in_features, loaders, study_name):
    # Generate the output features for each layer using trial suggestions
    out_features = []
    for i in range(num_layers):
        out_features.append(trial.suggest_int(f"out_features_layer_{i}", min_out, max_out))
    
    # Set device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    # Create model and move to device
    model = Model(in_features, out_features).to(device)
    
    # Initialize optimizer and loss function
    optimizer = torch.optim.Adam(model.parameters(), lr=0.0005)
    criterion = nn.BCEWithLogitsLoss()
    
    num_epochs = 100
    # Training loop for num_epochs
    for epoch in range(num_epochs):
        train_epoch(model, loaders, criterion, optimizer, num_epochs, epoch, device)
        test_loss, test_accuracy = test_model(model, loaders, criterion, device, num_epochs, epoch, loader='test')

    # Return the test loss to be minimized
    return test_loss

In [10]:
loaders = prepare_data_loaders(X_train, y_train, X_test, y_test, X_val, y_val, batch_size=batch_size, num_workers=num_workers)

# Define the parameters for the study
study_name = "Baseline"               # Name of your study
num_layers = 5                        # Example number of layers
min_out = 32                          # Minimum number of units per layer
max_out = 512                         # Maximum number of units per layer
in_features = X_train.shape[1]        # Set in_features to match the actual number of features in your data

study = optuna.create_study(study_name=study_name, direction='minimize')

# Define the objective function and run the optimization
study.optimize(lambda trial: objective(trial, num_layers, min_out, max_out, in_features, loaders, study_name), 
               n_trials=5, show_progress_bar=True)  # You can specify how many trials you want

# Print the best parameters found by the study
print(f"Best parameters: {study.best_params}")
print(f"Best trial: {study.best_trial}")





[I 2024-10-20 15:57:53,545] A new study created in memory with name: Baseline


  0%|          | 0/5 [00:00<?, ?it/s]

[I 2024-10-20 15:58:05,133] Trial 0 finished with value: 0.5822103241215582 and parameters: {'out_features_layer_0': 449, 'out_features_layer_1': 108, 'out_features_layer_2': 135, 'out_features_layer_3': 44, 'out_features_layer_4': 419}. Best is trial 0 with value: 0.5822103241215582.
[I 2024-10-20 15:58:15,732] Trial 1 finished with value: 0.541307588763859 and parameters: {'out_features_layer_0': 340, 'out_features_layer_1': 381, 'out_features_layer_2': 370, 'out_features_layer_3': 220, 'out_features_layer_4': 447}. Best is trial 1 with value: 0.541307588763859.
[I 2024-10-20 15:58:26,412] Trial 2 finished with value: 0.5368761085945627 and parameters: {'out_features_layer_0': 241, 'out_features_layer_1': 372, 'out_features_layer_2': 372, 'out_features_layer_3': 365, 'out_features_layer_4': 179}. Best is trial 2 with value: 0.5368761085945627.
[I 2024-10-20 15:58:37,101] Trial 3 finished with value: 0.5360770987427753 and parameters: {'out_features_layer_0': 231, 'out_features_layer_