**Import Libraries and Load Cleaned Data

In [2]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import time
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.utils.class_weight import compute_class_weight

# Load the cleaned dataset
df_cleaned = pd.read_csv('../data/cleaned_diabetes_dataset.csv')

**Split Data

In [3]:
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import TensorDataset, DataLoader

# Convert DataFrame to PyTorch tensors
X = torch.tensor(df_cleaned.drop('Diabetes_012', axis=1).values, dtype=torch.float32)
y = torch.tensor(df_cleaned['Diabetes_012'].values, dtype=torch.long)

# Split data into training and remaining dataset with 70% and 30% respectively
X_train, X_remaining, y_train, y_remaining = train_test_split(X, y, test_size=0.3, random_state=42)

# Split remaining dataset into validation and testing sets equally (50% each of the remaining data)
X_val, X_test, y_val, y_test = train_test_split(X_remaining, y_remaining, test_size=0.5, random_state=42)

# Create DataLoaders for each set
train_data = TensorDataset(X_train, y_train)
val_data = TensorDataset(X_val, y_val)
test_data = TensorDataset(X_test, y_test)

train_loader = DataLoader(train_data, batch_size=64, shuffle=True)
val_loader = DataLoader(val_data, batch_size=64, shuffle=False)
test_loader = DataLoader(test_data, batch_size=64, shuffle=False)


**Minority Class Improvement

In [4]:
# Increase the cost of misclassifying the minority class during training
class_weights = compute_class_weight('balanced', classes=np.unique(y_train.numpy()), y=y_train.numpy())
class_weights = torch.tensor(class_weights, dtype=torch.float32)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
class_weights = class_weights.to(device)

**NN Architecture

In [5]:
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size) 
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, hidden_size) 
        self.fc4 = nn.Linear(hidden_size, num_classes)
    
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        out = self.relu(out)
        out = self.fc3(out)
        out = self.relu(out)
        out = self.fc4(out)
        return out


def train_model(params):
    model = NeuralNet(X_train.shape[1], params['hidden_size'], 3).to(device)
    criterion = nn.CrossEntropyLoss(weight=class_weights)
    optimizer = optim.Adam(model.parameters(), lr=params['learning_rate'])

    for epoch in range(params['num_epochs']):
        model.train()
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
    return model


**Hyperparameter Tuning

In [None]:
hyperparams = {
    'learning_rate': [0.001, 0.01, 0.1],
    'hidden_size': [50, 100, 150],
    'num_epochs': [5, 10, 15]
}

best_accuracy = 0
best_params = {}

# Calculate the total number of iterations
total_iterations = len(hyperparams['learning_rate']) * len(hyperparams['hidden_size']) * len(hyperparams['num_epochs'])

with tqdm(total=total_iterations, desc="Hyperparameter Tuning") as pbar:
    for lr in hyperparams['learning_rate']:
        for hidden_size in hyperparams['hidden_size']:
            for num_epochs in hyperparams['num_epochs']:
                params = {'learning_rate': lr, 'hidden_size': hidden_size, 'num_epochs': num_epochs}
                
                start_time = time.time()  # Start time for the current set of parameters
                model = train_model(params)
                
                # Evaluate the model
                model.eval()
                with torch.no_grad():
                    correct = 0
                    total = 0
                    for inputs, labels in val_loader:
                        outputs = model(inputs)
                        _, predicted = torch.max(outputs.data, 1)
                        total += labels.size(0)
                        correct += (predicted == labels).sum().item()
                accuracy = correct / total

                elapsed_time = time.time() - start_time  # Calculate elapsed time

                # Update best accuracy and parameters if current model is better
                if accuracy > best_accuracy:
                    best_accuracy = accuracy
                    best_params = params

                # Update progress bar
                pbar.update(1)
                pbar.set_postfix_str(f"LR: {lr}, Hidden: {hidden_size}, Epochs: {num_epochs}, Acc: {accuracy:.4f}, Time: {elapsed_time:.2f}s")


Hyperparameter Tuning:  19%|████████████▌                                                       | 5/27 [04:09<19:24, 52.91s/it, LR: 0.001, Hidden: 100, Epochs: 10, Acc: 0.6594, Time: 61.33s]

**Train Best Model

In [None]:
best_model = train_model(best_params)

**Model Evaluation

In [None]:
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

def evaluate_model(model, test_loader):
    model.eval()
    y_true = []
    y_pred = []

    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            y_true.extend(labels.tolist())
            y_pred.extend(predicted.tolist())

    # Calculate evaluation metrics
    accuracy = accuracy_score(y_true, y_pred)
    precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred, average='macro', zero_division=0)

    return accuracy, precision, recall, f1

# After training your best_model and creating the test_loader, call this function
accuracy, precision, recall, f1 = evaluate_model(best_model, test_loader)

print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")
