# **0.0 DATA PROCESSING**

In [172]:
!git clone https://github.com/edwinkmusaasizi/Machine-Learning.git

Cloning into 'Machine-Learning'...
remote: Enumerating objects: 69, done.[K
remote: Counting objects: 100% (69/69), done.[K
remote: Compressing objects: 100% (65/65), done.[K
remote: Total 69 (delta 31), reused 7 (delta 0), pack-reused 0 (from 0)[K
Receiving objects: 100% (69/69), 467.68 KiB | 4.37 MiB/s, done.
Resolving deltas: 100% (31/31), done.


In [173]:
%cd Machine-Learning
%cd data
%cd interim
!ls

/content/Machine-Learning/data/interim/Machine-Learning/data/interim/Machine-Learning/data/interim/Machine-Learning
/content/Machine-Learning/data/interim/Machine-Learning/data/interim/Machine-Learning/data/interim/Machine-Learning/data
/content/Machine-Learning/data/interim/Machine-Learning/data/interim/Machine-Learning/data/interim/Machine-Learning/data/interim
cleaned_mental_health_data.csv


0.1 Data Processing

In [174]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder

# Load data
df = pd.read_csv("cleaned_mental_health_data.csv")

# Define adherence labels based on questionnaire responses
non_adherence_columns = [
    "Do you ever forget to take your medication?",
    "Are you careless at times about taking your medication?",
    "When you feel better, do you sometimes stop taking your medication?",
    "Sometimes if you feel worse when you take the medication, do you stop taking it?",
    "I take my medication only when I am sick"
]

df["adherence"] = np.where(df[non_adherence_columns].eq("Yes").any(axis=1), 0, 1)

# Drop redundant columns
df = df.drop(columns=non_adherence_columns + ["If you have any further comments about medication or this questionnaire, please write them below"])

# Identify all categorical columns
categorical_cols = df.select_dtypes(include=['object', 'category']).columns
print("Categorical columns to encode:", categorical_cols)

# Encode all categorical features
for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col].astype(str))

# Split features and labels
X = df.drop(columns="adherence").values
y = df["adherence"].values

# Split data into train, validation, test (70-15-15)
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, stratify=y_temp, random_state=42)

# Apply SMOTE to only the training set
from imblearn.over_sampling import SMOTE

# Apply SMOTE only to the training data
smote = SMOTE(sampling_strategy='auto', random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

# Check new class distribution
from collections import Counter
print("New class distribution:", Counter(y_train_resampled))

# Normalize numerical features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

# Convert to PyTorch tensors
train_dataset = TensorDataset(torch.FloatTensor(X_train), torch.FloatTensor(y_train))
val_dataset = TensorDataset(torch.FloatTensor(X_val), torch.FloatTensor(y_val))
test_dataset = TensorDataset(torch.FloatTensor(X_test), torch.FloatTensor(y_test))

# Create DataLoaders
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size)
test_loader = DataLoader(test_dataset, batch_size=batch_size)

Categorical columns to encode: Index(['sex', 'Religion', 'marital status', 'education status', 'residence',
       'substance use', 'comorbidity',
       'It is unnatural for my mind and body to be controlled by medication?',
       'My thoughts are clearer on medication',
       'By staying on medication, I can prevent getting sick',
       'I feel weird, like a ‘zombie’ on medication',
       'Medication makes me feel tired and sluggish',
       'Some of your symptoms are made by your mind.', 'You are mentally well',
       'You do not need medication', 'Your stay in the hospital is necessary',
       'The doctor is right in prescribing medication for you.',
       'You do not need to be seen by a doctor or psychiatrist',
       'If someone said you have a nervous or mental illness, they would be right',
       'None of the unusual things you are experiencing are due to an illness.',
       '. Loss of energy or drive', 'Feeling unmotivated or numb',
       'Daytime sedation or drowsi

In [175]:
# Check class distribution in the training set
class_distribution = np.bincount(y_train)
print("Class Distribution in Training Set:")
print(f"Class 0 (Non-Adherent): {class_distribution[0]}")
print(f"Class 1 (Adherent): {class_distribution[1]}")

Class Distribution in Training Set:
Class 0 (Non-Adherent): 55
Class 1 (Adherent): 26


Implement class weight

In [176]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.utils.class_weight import compute_class_weight
import numpy as np
import pandas as pd

# Compute class weights
class_weights = compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)
class_weights = torch.tensor(class_weights, dtype=torch.float32)

# Define loss function with class weights
criterion = nn.CrossEntropyLoss(weight=class_weights)

# Example model (Assuming a simple neural network)
class AdherenceModel(nn.Module):
    def __init__(self, input_size):
        super(AdherenceModel, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(64, 2)  # Output has 2 classes (0 and 1)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)  # No softmax needed for CrossEntropyLoss
        return x

# Initialize model
input_size = X_train.shape[1]  # Number of features
model = AdherenceModel(input_size)

# Define optimizer
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop (simplified)
for epoch in range(10):  # Adjust epochs as needed
    model.train()
    for batch_X, batch_y in train_loader:
        optimizer.zero_grad()
        outputs = model(batch_X)
        batch_y = batch_y.long()  # Convert to long for CrossEntropyLoss
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch+1}, Loss: {loss.item()}")



Epoch 1, Loss: 0.6953945159912109
Epoch 2, Loss: 0.7195978164672852
Epoch 3, Loss: 0.63300621509552
Epoch 4, Loss: 0.6330546736717224
Epoch 5, Loss: 0.6474331021308899
Epoch 6, Loss: 0.5303978323936462
Epoch 7, Loss: 0.5786312818527222
Epoch 8, Loss: 0.5541043281555176
Epoch 9, Loss: 0.5522679090499878
Epoch 10, Loss: 0.46876275539398193


# MODEL IMPLEMANTION

##Implement the Feedforward Neural Network

1 Implementation

In [177]:
import torch
import torch.nn as nn
import torch.optim as optim

# Define the first model architecture (Feedforward Neural Network)
class FeedForwardNN(nn.Module):
    def __init__(self, input_dim):
        super(FeedForwardNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, 64)  # First hidden layer
        self.fc2 = nn.Linear(64, 32)         # Second hidden layer
        self.fc3 = nn.Linear(32, 1)          # Output layer
        self.sigmoid = nn.Sigmoid()          # Sigmoid activation for binary classification

    def forward(self, x):
        x = torch.relu(self.fc1(x))           # ReLU activation
        x = torch.relu(self.fc2(x))           # ReLU activation
        x = self.sigmoid(self.fc3(x))         # Output layer with sigmoid activation
        return x

# Example input dimension (you should use the actual number of features)
input_dim = X_train.shape[1]  # assuming X_train is already defined
model = FeedForwardNN(input_dim)

# Define optimizer and loss function
optimizer = optim.Adam(model.parameters(), lr=0.001)
loss_fn = nn.BCELoss()  # Binary Cross Entropy for binary classification

# Training loop
def train_model(model, train_loader, val_loader, optimizer, loss_fn, epochs=100):
    best_val_loss = float('inf')

    for epoch in range(epochs):
        model.train()
        train_loss = 0
        correct = 0
        total = 0

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()

            # Forward pass
            outputs = model(inputs)
            loss = loss_fn(outputs.squeeze(), labels)  # Squeeze to make outputs the same shape as labels
            loss.backward()
            optimizer.step()

            train_loss += loss.item()

            # Calculate accuracy
            predicted = (outputs > 0.5).float()
            correct += (predicted.squeeze() == labels).sum().item()
            total += labels.size(0)

        # Validation step
        model.eval()
        val_loss = 0
        val_correct = 0
        val_total = 0
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)

                outputs = model(inputs)
                val_loss += loss_fn(outputs.squeeze(), labels).item()

                predicted = (outputs > 0.5).float()
                val_correct += (predicted.squeeze() == labels).sum().item()
                val_total += labels.size(0)

        # Calculate metrics
        train_loss = train_loss / len(train_loader)
        train_accuracy = correct / total
        val_loss = val_loss / len(val_loader)
        val_accuracy = val_correct / val_total

        print(f"Epoch {epoch + 1}/{epochs}, Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}, "
              f"Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.4f}")

        # Save model if it's the best validation loss
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            torch.save(model.state_dict(), "best_feedforward_model.pth")

# Move the model to the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Train the model
train_model(model, train_loader, val_loader, optimizer, loss_fn)


Epoch 1/100, Train Loss: 0.6768, Train Accuracy: 0.6667, Val Loss: 0.6705, Val Accuracy: 0.7222
Epoch 2/100, Train Loss: 0.6617, Train Accuracy: 0.6790, Val Loss: 0.6587, Val Accuracy: 0.6667
Epoch 3/100, Train Loss: 0.6498, Train Accuracy: 0.6790, Val Loss: 0.6482, Val Accuracy: 0.6667
Epoch 4/100, Train Loss: 0.6388, Train Accuracy: 0.6790, Val Loss: 0.6389, Val Accuracy: 0.6667
Epoch 5/100, Train Loss: 0.6285, Train Accuracy: 0.6790, Val Loss: 0.6297, Val Accuracy: 0.6667
Epoch 6/100, Train Loss: 0.6125, Train Accuracy: 0.6790, Val Loss: 0.6206, Val Accuracy: 0.6667
Epoch 7/100, Train Loss: 0.5985, Train Accuracy: 0.6790, Val Loss: 0.6110, Val Accuracy: 0.6667
Epoch 8/100, Train Loss: 0.5843, Train Accuracy: 0.6790, Val Loss: 0.6006, Val Accuracy: 0.6667
Epoch 9/100, Train Loss: 0.5823, Train Accuracy: 0.6914, Val Loss: 0.5890, Val Accuracy: 0.6667
Epoch 10/100, Train Loss: 0.5599, Train Accuracy: 0.7037, Val Loss: 0.5770, Val Accuracy: 0.7222
Epoch 11/100, Train Loss: 0.5244, Train

2. Prepare Data

In [178]:
import torch
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np

# Load data
df = pd.read_csv("cleaned_mental_health_data.csv")

# Define adherence labels based on questionnaire responses
non_adherence_columns = [
    "Do you ever forget to take your medication?",
    "Are you careless at times about taking your medication?",
    "When you feel better, do you sometimes stop taking your medication?",
    "Sometimes if you feel worse when you take the medication, do you stop taking it?",
    "I take my medication only when I am sick"
]

df["adherence"] = np.where(df[non_adherence_columns].eq("Yes").any(axis=1), 0, 1)

# Drop redundant columns
df = df.drop(columns=non_adherence_columns + ["If you have any further comments about medication or this questionnaire, please write them below"])

# Identify all categorical columns
categorical_cols = df.select_dtypes(include=['object', 'category']).columns
print("Categorical columns to encode:", categorical_cols)

# Encode all categorical features
from sklearn.preprocessing import LabelEncoder
for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col].astype(str))

# Split features and labels
X = df.drop(columns="adherence").values
y = df["adherence"].values

# Split data into train, validation, test (70-15-15)
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, stratify=y_temp, random_state=42)

# Apply SMOTE to only the training set
from imblearn.over_sampling import SMOTE

# Apply SMOTE only to the training data
smote = SMOTE(sampling_strategy='auto', random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

# Check new class distribution
from collections import Counter
print("New class distribution:", Counter(y_train_resampled))

# Normalize numerical features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_resampled)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

# Check the shape of the data to ensure it matches
print("Shape of X_train_scaled:", X_train_scaled.shape)
print("Shape of y_train_resampled:", y_train_resampled.shape)

# Ensure that X_train_scaled and y_train_resampled have the same number of samples
assert X_train_scaled.shape[0] == y_train_resampled.shape[0], "Mismatch in number of samples between X_train_scaled and y_train_resampled"

# Convert to torch tensors
train_tensor = TensorDataset(torch.tensor(X_train_scaled, dtype=torch.float32),
                              torch.tensor(y_train_resampled, dtype=torch.float32))
val_tensor = TensorDataset(torch.tensor(X_val_scaled, dtype=torch.float32),
                            torch.tensor(y_val, dtype=torch.float32))
test_tensor = TensorDataset(torch.tensor(X_test_scaled, dtype=torch.float32),
                             torch.tensor(y_test, dtype=torch.float32))

# Create DataLoaders
batch_size = 32
train_loader = DataLoader(train_tensor, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_tensor, batch_size=batch_size)
test_loader = DataLoader(test_tensor, batch_size=batch_size)


Categorical columns to encode: Index(['sex', 'Religion', 'marital status', 'education status', 'residence',
       'substance use', 'comorbidity',
       'It is unnatural for my mind and body to be controlled by medication?',
       'My thoughts are clearer on medication',
       'By staying on medication, I can prevent getting sick',
       'I feel weird, like a ‘zombie’ on medication',
       'Medication makes me feel tired and sluggish',
       'Some of your symptoms are made by your mind.', 'You are mentally well',
       'You do not need medication', 'Your stay in the hospital is necessary',
       'The doctor is right in prescribing medication for you.',
       'You do not need to be seen by a doctor or psychiatrist',
       'If someone said you have a nervous or mental illness, they would be right',
       'None of the unusual things you are experiencing are due to an illness.',
       '. Loss of energy or drive', 'Feeling unmotivated or numb',
       'Daytime sedation or drowsi

3 Model initialisation

In [180]:
# Initialize the FeedForwardNN model
input_dim = X_train.shape[1]  # number of features in your dataset
model = FeedForwardNN(input_dim)

# Set device to GPU if available, otherwise use CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Define optimizer and loss function
optimizer = optim.Adam(model.parameters(), lr=0.001)
loss_fn = nn.BCELoss()  # Binary Cross Entropy Loss for binary classification


4 TRaining the model

In [181]:
# Training the model
train_model(model, train_loader, val_loader, optimizer, loss_fn, epochs=100)


Epoch 1/100, Train Loss: 0.6922, Train Accuracy: 0.4909, Val Loss: 0.6916, Val Accuracy: 0.5000
Epoch 2/100, Train Loss: 0.6693, Train Accuracy: 0.6273, Val Loss: 0.6799, Val Accuracy: 0.6667
Epoch 3/100, Train Loss: 0.6566, Train Accuracy: 0.7273, Val Loss: 0.6689, Val Accuracy: 0.6667
Epoch 4/100, Train Loss: 0.6389, Train Accuracy: 0.8545, Val Loss: 0.6591, Val Accuracy: 0.7222
Epoch 5/100, Train Loss: 0.6140, Train Accuracy: 0.8636, Val Loss: 0.6487, Val Accuracy: 0.7222
Epoch 6/100, Train Loss: 0.5909, Train Accuracy: 0.8636, Val Loss: 0.6374, Val Accuracy: 0.7222
Epoch 7/100, Train Loss: 0.5766, Train Accuracy: 0.8818, Val Loss: 0.6254, Val Accuracy: 0.7222
Epoch 8/100, Train Loss: 0.5432, Train Accuracy: 0.9000, Val Loss: 0.6130, Val Accuracy: 0.7222
Epoch 9/100, Train Loss: 0.5226, Train Accuracy: 0.8909, Val Loss: 0.6015, Val Accuracy: 0.6667
Epoch 10/100, Train Loss: 0.4990, Train Accuracy: 0.8818, Val Loss: 0.5928, Val Accuracy: 0.6667
Epoch 11/100, Train Loss: 0.4689, Train

5 Evaluation

In [None]:
import time
import torch
import torch.optim as optim
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score

# Define your model (for example, using a simple neural network)
class SimpleNN(torch.nn.Module):
    def __init__(self, input_dim):
        super(SimpleNN, self).__init__()
        self.fc1 = torch.nn.Linear(input_dim, 64)
        self.fc2 = torch.nn.Linear(64, 32)
        self.fc3 = torch.nn.Linear(32, 1)
        self.sigmoid = torch.nn.Sigmoid()

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        x = self.sigmoid(x)
        return x

# Initialize model, optimizer, and loss function
input_dim = X_train_scaled.shape[1]
model = SimpleNN(input_dim)
learning_rate = 0.001
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
criterion = torch.nn.BCELoss()

# Start tracking training time
start_time = time.time()

# Training loop
epochs = 10
for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, labels in train_loader:
        # Move data to GPU if available
        inputs, labels = inputs.to(device), labels.to(device)

        # Zero gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs.squeeze(), labels)

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        # Update metrics
        running_loss += loss.item()
        predicted = (outputs.squeeze() > 0.5).float()
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

    epoch_loss = running_loss / len(train_loader)
    epoch_accuracy = 100 * correct / total

    # Evaluation on validation set after each epoch
    model.eval()
    val_preds, val_labels = [], []
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            val_preds.append(outputs.squeeze().cpu().numpy())
            val_labels.append(labels.cpu().numpy())

    val_preds = np.concatenate(val_preds)
    val_labels = np.concatenate(val_labels)

    # Calculate metrics
    val_precision = precision_score(val_labels, (val_preds > 0.5).astype(int))
    val_recall = recall_score(val_labels, (val_preds > 0.5).astype(int))
    val_f1 = f1_score(val_labels, (val_preds > 0.5).astype(int))
    val_auc = roc_auc_score(val_labels, val_preds)

    # Print metrics
    print(f"Epoch {epoch+1}/{epochs} | Loss: {epoch_loss:.4f} | Accuracy: {epoch_accuracy:.2f}%")
    print(f"Validation Precision: {val_precision:.4f} | Validation Recall: {val_recall:.4f}")
    print(f"Validation F1-Score: {val_f1:.4f} | Validation AUC: {val_auc:.4f}")

# End training time
end_time = time.time()
training_time = end_time - start_time

# Final evaluation on test set
model.eval()
test_preds, test_labels = [], []
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        test_preds.append(outputs.squeeze().cpu().numpy())
        test_labels.append(labels.cpu().numpy())

test_preds = np.concatenate(test_preds)
test_labels = np.concatenate(test_labels)

# Calculate final test metrics
test_precision = precision_score(test_labels, (test_preds > 0.5).astype(int))
test_recall = recall_score(test_labels, (test_preds > 0.5).astype(int))
test_f1 = f1_score(test_labels, (test_preds > 0.5).astype(int))
test_auc = roc_auc_score(test_labels, test_preds)

# Print test metrics
print("\nTest Metrics:")
print(f"Test Precision: {test_precision:.4f} | Test Recall: {test_recall:.4f}")
print(f"Test F1-Score: {test_f1:.4f} | Test AUC: {test_auc:.4f}")

# Print learning rate and training time
print(f"\nLearning Rate: {learning_rate}")
print(f"Training Time: {training_time:.2f} seconds")


Epoch 1/10 | Loss: 0.6937 | Accuracy: 49.09%
Validation Precision: 0.3333 | Validation Recall: 1.0000
Validation F1-Score: 0.5000 | Validation AUC: 0.5694
Epoch 2/10 | Loss: 0.6743 | Accuracy: 60.91%
Validation Precision: 0.4167 | Validation Recall: 0.8333
Validation F1-Score: 0.5556 | Validation AUC: 0.6944
Epoch 3/10 | Loss: 0.6608 | Accuracy: 72.73%
Validation Precision: 0.5000 | Validation Recall: 0.8333
Validation F1-Score: 0.6250 | Validation AUC: 0.7500
Epoch 4/10 | Loss: 0.6394 | Accuracy: 75.45%
Validation Precision: 0.6250 | Validation Recall: 0.8333
Validation F1-Score: 0.7143 | Validation AUC: 0.6944
Epoch 5/10 | Loss: 0.6275 | Accuracy: 79.09%
Validation Precision: 0.4000 | Validation Recall: 0.3333
Validation F1-Score: 0.3636 | Validation AUC: 0.6806
Epoch 6/10 | Loss: 0.6048 | Accuracy: 81.82%
Validation Precision: 0.4000 | Validation Recall: 0.3333
Validation F1-Score: 0.3636 | Validation AUC: 0.6667
Epoch 7/10 | Loss: 0.5870 | Accuracy: 82.73%
Validation Precision: 0.4

Saving best model

In [None]:
# Load the best model for inference
best_model = FeedForwardNN(input_dim)
best_model.load_state_dict(torch.load("best_feedforward_model.pth"))
best_model.to(device)


  best_model.load_state_dict(torch.load("best_feedforward_model.pth"))


FeedForwardNN(
  (fc1): Linear(in_features=59, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=32, bias=True)
  (fc3): Linear(in_features=32, out_features=1, bias=True)
  (sigmoid): Sigmoid()
)

In [184]:
ff_model = FeedForwardNN(input_dim)  # Redefine model
ff_model.load_state_dict(torch.load("best_feedforward_model.pth"))  # Load weights
ff_model.to(device)
ff_model.eval()  # Set to evaluation mode


  ff_model.load_state_dict(torch.load("best_feedforward_model.pth"))  # Load weights


FeedForwardNN(
  (fc1): Linear(in_features=59, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=32, bias=True)
  (fc3): Linear(in_features=32, out_features=1, bias=True)
  (sigmoid): Sigmoid()
)

In [185]:
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score

# Calculate metrics
ffnn_precision = precision_score(y_test, y_pred_ffnn)
ffnn_recall = recall_score(y_test, y_pred_ffnn)
ffnn_f1 = f1_score(y_test, y_pred_ffnn)
ffnn_auc = roc_auc_score(y_test, y_prob_ffnn)

# Store training time if you recorded it during training
ffnn_time = ff_training_time  # Ensure this is properly stored

# Print results
print(f"FastForward Neural Network Evaluation:")
print(f"Precision: {ffnn_precision:.4f} | Recall: {ffnn_recall:.4f}")
print(f"F1-Score: {ffnn_f1:.4f} | AUC: {ffnn_auc:.4f}")
print(f"Training Time: {ffnn_time:.2f} seconds")


NameError: name 'y_pred_ffnn' is not defined

## Implement Logisitic Regretion

1 Import Libraries

In [None]:
from sklearn.linear_model import LogisticRegression
import time
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score
from sklearn.preprocessing import StandardScaler

# Assuming data preprocessing has been done and the data is available in these variables
# X_train, X_val, X_test, y_train, y_val, y_test
# Scaling features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)


2. Training

In [None]:
# Initialize the Logistic Regression model
logreg_model = LogisticRegression(random_state=42, max_iter=1000)

# Record the start time for training
start_time = time.time()

# Train the model
logreg_model.fit(X_train_scaled, y_train)

# Record the end time for training
end_time = time.time()

# Calculate the training time
training_time = end_time - start_time


3. Evaluation

In [None]:
# Predict on train, validation, and test sets
y_train_pred = logreg_model.predict(X_train_scaled)
y_val_pred = logreg_model.predict(X_val_scaled)
y_test_pred = logreg_model.predict(X_test_scaled)

# Calculate metrics for training, validation, and test sets
train_precision = precision_score(y_train, y_train_pred)
train_recall = recall_score(y_train, y_train_pred)
train_f1 = f1_score(y_train, y_train_pred)
train_auc = roc_auc_score(y_train, y_train_pred)

val_precision = precision_score(y_val, y_val_pred)
val_recall = recall_score(y_val, y_val_pred)
val_f1 = f1_score(y_val, y_val_pred)
val_auc = roc_auc_score(y_val, y_val_pred)

test_precision = precision_score(y_test, y_test_pred)
test_recall = recall_score(y_test, y_test_pred)
test_f1 = f1_score(y_test, y_test_pred)
test_auc = roc_auc_score(y_test, y_test_pred)

# Print the metrics and training time
print(f"Training time: {training_time:.2f} seconds")
print(f"Training Precision: {train_precision:.4f} | Training Recall: {train_recall:.4f} | Training F1-Score: {train_f1:.4f} | Training AUC: {train_auc:.4f}")
print(f"Validation Precision: {val_precision:.4f} | Validation Recall: {val_recall:.4f} | Validation F1-Score: {val_f1:.4f} | Validation AUC: {val_auc:.4f}")
print(f"Test Precision: {test_precision:.4f} | Test Recall: {test_recall:.4f} | Test F1-Score: {test_f1:.4f} | Test AUC: {test_auc:.4f}")


Training time: 0.07 seconds
Training Precision: 1.0000 | Training Recall: 1.0000 | Training F1-Score: 1.0000 | Training AUC: 1.0000
Validation Precision: 1.0000 | Validation Recall: 0.5000 | Validation F1-Score: 0.6667 | Validation AUC: 0.7500
Test Precision: 0.2500 | Test Recall: 0.1667 | Test F1-Score: 0.2000 | Test AUC: 0.4583


## Implementing Support Vector Machine

In [None]:
from sklearn.svm import SVC
import time
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score
from sklearn.preprocessing import StandardScaler

# Assuming data preprocessing has been done and the data is available in these variables
# X_train, X_val, X_test, y_train, y_val, y_test
# Scaling features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)


# Initialize the SVM model with a radial basis function kernel
svm_model = SVC(probability=True, random_state=42)

# Record the start time for training
start_time = time.time()

# Train the model
svm_model.fit(X_train_scaled, y_train)

# Record the end time for training
end_time = time.time()

# Calculate the training time
training_time = end_time - start_time


Evaluation

In [None]:
# Predict on train, validation, and test sets
y_train_pred = svm_model.predict(X_train_scaled)
y_val_pred = svm_model.predict(X_val_scaled)
y_test_pred = svm_model.predict(X_test_scaled)

# Calculate metrics for training, validation, and test sets
train_precision = precision_score(y_train, y_train_pred)
train_recall = recall_score(y_train, y_train_pred)
train_f1 = f1_score(y_train, y_train_pred)
train_auc = roc_auc_score(y_train, svm_model.predict_proba(X_train_scaled)[:, 1])

val_precision = precision_score(y_val, y_val_pred)
val_recall = recall_score(y_val, y_val_pred)
val_f1 = f1_score(y_val, y_val_pred)
val_auc = roc_auc_score(y_val, svm_model.predict_proba(X_val_scaled)[:, 1])

test_precision = precision_score(y_test, y_test_pred)
test_recall = recall_score(y_test, y_test_pred)
test_f1 = f1_score(y_test, y_test_pred)
test_auc = roc_auc_score(y_test, svm_model.predict_proba(X_test_scaled)[:, 1])

# Print the metrics and training time
print(f"Training time: {training_time:.2f} seconds")
print(f"Training Precision: {train_precision:.4f} | Training Recall: {train_recall:.4f} | Training F1-Score: {train_f1:.4f} | Training AUC: {train_auc:.4f}")
print(f"Validation Precision: {val_precision:.4f} | Validation Recall: {val_recall:.4f} | Validation F1-Score: {val_f1:.4f} | Validation AUC: {val_auc:.4f}")
print(f"Test Precision: {test_precision:.4f} | Test Recall: {test_recall:.4f} | Test F1-Score: {test_f1:.4f} | Test AUC: {test_auc:.4f}")


Training time: 0.01 seconds
Training Precision: 1.0000 | Training Recall: 0.7308 | Training F1-Score: 0.8444 | Training AUC: 1.0000
Validation Precision: 1.0000 | Validation Recall: 0.1667 | Validation F1-Score: 0.2857 | Validation AUC: 0.6389
Test Precision: 0.0000 | Test Recall: 0.0000 | Test F1-Score: 0.0000 | Test AUC: 0.4167


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


## Implementing Random Forest Classifier

In [None]:
from sklearn.ensemble import RandomForestClassifier
import time
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score
from sklearn.preprocessing import StandardScaler

# Assuming data preprocessing has been done and the data is available in these variables
# X_train, X_val, X_test, y_train, y_val, y_test
# Scaling features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

#TRAINING

# Initialize the Random Forest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)

# Record the start time for training
start_time = time.time()

# Train the model
rf_model.fit(X_train_scaled, y_train)

# Record the end time for training
end_time = time.time()

# Calculate the training time
training_time = end_time - start_time

#EVALUATION

# Predict on train, validation, and test sets
y_train_pred = rf_model.predict(X_train_scaled)
y_val_pred = rf_model.predict(X_val_scaled)
y_test_pred = rf_model.predict(X_test_scaled)

# Calculate metrics for training, validation, and test sets
train_precision = precision_score(y_train, y_train_pred)
train_recall = recall_score(y_train, y_train_pred)
train_f1 = f1_score(y_train, y_train_pred)
train_auc = roc_auc_score(y_train, rf_model.predict_proba(X_train_scaled)[:, 1])

val_precision = precision_score(y_val, y_val_pred)
val_recall = recall_score(y_val, y_val_pred)
val_f1 = f1_score(y_val, y_val_pred)
val_auc = roc_auc_score(y_val, rf_model.predict_proba(X_val_scaled)[:, 1])

test_precision = precision_score(y_test, y_test_pred)
test_recall = recall_score(y_test, y_test_pred)
test_f1 = f1_score(y_test, y_test_pred)
test_auc = roc_auc_score(y_test, rf_model.predict_proba(X_test_scaled)[:, 1])

# Print the metrics and training time
print(f"Training time: {training_time:.2f} seconds")
print(f"Training Precision: {train_precision:.4f} | Training Recall: {train_recall:.4f} | Training F1-Score: {train_f1:.4f} | Training AUC: {train_auc:.4f}")
print(f"Validation Precision: {val_precision:.4f} | Validation Recall: {val_recall:.4f} | Validation F1-Score: {val_f1:.4f} | Validation AUC: {val_auc:.4f}")
print(f"Test Precision: {test_precision:.4f} | Test Recall: {test_recall:.4f} | Test F1-Score: {test_f1:.4f} | Test AUC: {test_auc:.4f}")


Training time: 0.15 seconds
Training Precision: 1.0000 | Training Recall: 1.0000 | Training F1-Score: 1.0000 | Training AUC: 1.0000
Validation Precision: 0.7500 | Validation Recall: 0.5000 | Validation F1-Score: 0.6000 | Validation AUC: 0.7917
Test Precision: 1.0000 | Test Recall: 0.1667 | Test F1-Score: 0.2857 | Test AUC: 0.4653


## Implementing a K-Nearest Neighbor

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, accuracy_score
import time

# Initialize KNN model
knn_model = KNeighborsClassifier(n_neighbors=5)

# Record the start time for training
start_time = time.time()

# Train the model
knn_model.fit(X_train_scaled, y_train)

# Record the end time for training
end_time = time.time()

# Calculate training time
training_time = end_time - start_time

# Make predictions on the test set
y_pred_knn = knn_model.predict(X_test_scaled)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred_knn)
report = classification_report(y_test, y_pred_knn)

# Store the metrics
print("KNN Model Test Metrics:")
print(f"Accuracy: {accuracy:.4f}")
print(f"Training Time: {training_time:.4f} seconds")
print("Classification Report:\n", report)

# Optionally, return metrics if you plan to store them
metrics = {
    "model": "KNN",
    "accuracy": accuracy,
    "training_time": training_time,
    "classification_report": report
}


KNN Model Test Metrics:
Accuracy: 0.7222
Training Time: 0.0052 seconds
Classification Report:
               precision    recall  f1-score   support

           0       0.73      0.92      0.81        12
           1       0.67      0.33      0.44         6

    accuracy                           0.72        18
   macro avg       0.70      0.62      0.63        18
weighted avg       0.71      0.72      0.69        18



Implementing Gradient Boost machine

In [None]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score
import time

# Initialize Gradient Boosting Classifier
gbm_model = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)

# Record the start time for training
start_time = time.time()

# Train the model
gbm_model.fit(X_train_scaled, y_train)

# Record the end time for training
end_time = time.time()
training_time = end_time - start_time

# Predict on the validation and test sets
y_val_pred = gbm_model.predict(X_val_scaled)
y_test_pred = gbm_model.predict(X_test_scaled)

# Calculate metrics for evaluation
val_precision = precision_score(y_val, y_val_pred)
val_recall = recall_score(y_val, y_val_pred)
val_f1 = f1_score(y_val, y_val_pred)
val_auc = roc_auc_score(y_val, gbm_model.predict_proba(X_val_scaled)[:, 1])

test_precision = precision_score(y_test, y_test_pred)
test_recall = recall_score(y_test, y_test_pred)
test_f1 = f1_score(y_test, y_test_pred)
test_auc = roc_auc_score(y_test, gbm_model.predict_proba(X_test_scaled)[:, 1])

# Print the evaluation metrics
print(f"Gradient Boosting Model Evaluation (Validation Set):")
print(f"Validation Precision: {val_precision:.4f} | Validation Recall: {val_recall:.4f}")
print(f"Validation F1-Score: {val_f1:.4f} | Validation AUC: {val_auc:.4f}")

print(f"\nGradient Boosting Model Evaluation (Test Set):")
print(f"Test Precision: {test_precision:.4f} | Test Recall: {test_recall:.4f}")
print(f"Test F1-Score: {test_f1:.4f} | Test AUC: {test_auc:.4f}")

print(f"\nTraining Time: {training_time:.4f} seconds")


Gradient Boosting Model Evaluation (Validation Set):
Validation Precision: 0.5000 | Validation Recall: 0.5000
Validation F1-Score: 0.5000 | Validation AUC: 0.6389

Gradient Boosting Model Evaluation (Test Set):
Test Precision: 0.3333 | Test Recall: 0.3333
Test F1-Score: 0.3333 | Test AUC: 0.4444

Training Time: 0.2934 seconds


## Implementing XGBoost

In [None]:
!pip install xgboost




In [None]:
import xgboost as xgb
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score
import time

# Initialize XGBoost Classifier
xgboost_model = xgb.XGBClassifier(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)

# Record the start time for training
start_time = time.time()

# Train the model
xgboost_model.fit(X_train_scaled, y_train)

# Record the end time for training
end_time = time.time()
training_time = end_time - start_time

# Predict on the validation and test sets
y_val_pred = xgboost_model.predict(X_val_scaled)
y_test_pred = xgboost_model.predict(X_test_scaled)

# Calculate metrics for evaluation
val_precision = precision_score(y_val, y_val_pred)
val_recall = recall_score(y_val, y_val_pred)
val_f1 = f1_score(y_val, y_val_pred)
val_auc = roc_auc_score(y_val, xgboost_model.predict_proba(X_val_scaled)[:, 1])

test_precision = precision_score(y_test, y_test_pred)
test_recall = recall_score(y_test, y_test_pred)
test_f1 = f1_score(y_test, y_test_pred)
test_auc = roc_auc_score(y_test, xgboost_model.predict_proba(X_test_scaled)[:, 1])

# Print the evaluation metrics
print(f"XGBoost Model Evaluation (Validation Set):")
print(f"Validation Precision: {val_precision:.4f} | Validation Recall: {val_recall:.4f}")
print(f"Validation F1-Score: {val_f1:.4f} | Validation AUC: {val_auc:.4f}")

print(f"\nXGBoost Model Evaluation (Test Set):")
print(f"Test Precision: {test_precision:.4f} | Test Recall: {test_recall:.4f}")
print(f"Test F1-Score: {test_f1:.4f} | Test AUC: {test_auc:.4f}")

print(f"\nTraining Time: {training_time:.4f} seconds")


XGBoost Model Evaluation (Validation Set):
Validation Precision: 0.6000 | Validation Recall: 0.5000
Validation F1-Score: 0.5455 | Validation AUC: 0.5556

XGBoost Model Evaluation (Test Set):
Test Precision: 0.2000 | Test Recall: 0.1667
Test F1-Score: 0.1818 | Test AUC: 0.3750

Training Time: 0.0725 seconds


## Implementing LightGBM

In [None]:
!pip install lightgbm


In [None]:
import lightgbm as lgb
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score
import time

# Initialize LightGBM Classifier
lgbm_model = lgb.LGBMClassifier(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)

# Record the start time for training
start_time = time.time()

# Train the model
lgbm_model.fit(X_train_scaled, y_train)

# Record the end time for training
end_time = time.time()
training_time = end_time - start_time

# Predict on the validation and test sets
y_val_pred = lgbm_model.predict(X_val_scaled)
y_test_pred = lgbm_model.predict(X_test_scaled)

# Calculate metrics for evaluation
val_precision = precision_score(y_val, y_val_pred)
val_recall = recall_score(y_val, y_val_pred)
val_f1 = f1_score(y_val, y_val_pred)
val_auc = roc_auc_score(y_val, lgbm_model.predict_proba(X_val_scaled)[:, 1])

test_precision = precision_score(y_test, y_test_pred)
test_recall = recall_score(y_test, y_test_pred)
test_f1 = f1_score(y_test, y_test_pred)
test_auc = roc_auc_score(y_test, lgbm_model.predict_proba(X_test_scaled)[:, 1])

# Print the evaluation metrics
print(f"LightGBM Model Evaluation (Validation Set):")
print(f"Validation Precision: {val_precision:.4f} | Validation Recall: {val_recall:.4f}")
print(f"Validation F1-Score: {val_f1:.4f} | Validation AUC: {val_auc:.4f}")

print(f"\nLightGBM Model Evaluation (Test Set):")
print(f"Test Precision: {test_precision:.4f} | Test Recall: {test_recall:.4f}")
print(f"Test F1-Score: {test_f1:.4f} | Test AUC: {test_auc:.4f}")

print(f"\nTraining Time: {training_time:.4f} seconds")


[LightGBM] [Info] Number of positive: 26, number of negative: 55
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001174 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 217
[LightGBM] [Info] Number of data points in the train set: 81, number of used features: 45
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.320988 -> initscore=-0.749237
[LightGBM] [Info] Start training from score -0.749237
LightGBM Model Evaluation (Validation Set):
Validation Precision: 0.4000 | Validation Recall: 0.3333
Validation F1-Score: 0.3636 | Validation AUC: 0.6667

LightGBM Model Evaluation (Test Set):
Test Precision: 0.2500 | Test Recall: 0.1667
Test F1-Score: 0.2000 | Test AUC: 0.4583

Training Time: 0.0694 seconds




## Implementing CATBoost

In [None]:
!pip install catboost


Collecting catboost
  Downloading catboost-1.2.7-cp311-cp311-manylinux2014_x86_64.whl.metadata (1.2 kB)
Downloading catboost-1.2.7-cp311-cp311-manylinux2014_x86_64.whl (98.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m98.7/98.7 MB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: catboost
Successfully installed catboost-1.2.7


In [None]:
from catboost import CatBoostClassifier
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score
import time

# Initialize CatBoost Classifier
catboost_model = CatBoostClassifier(iterations=100, learning_rate=0.1, depth=3, random_seed=42, verbose=0)

# Record start time
start_time = time.time()

# Train the model
catboost_model.fit(X_train_scaled, y_train)

# Record end time
end_time = time.time()
training_time = end_time - start_time

# Predict on validation and test sets
y_val_pred = catboost_model.predict(X_val_scaled)
y_test_pred = catboost_model.predict(X_test_scaled)

# Calculate evaluation metrics
val_precision = precision_score(y_val, y_val_pred)
val_recall = recall_score(y_val, y_val_pred)
val_f1 = f1_score(y_val, y_val_pred)
val_auc = roc_auc_score(y_val, catboost_model.predict_proba(X_val_scaled)[:, 1])

test_precision = precision_score(y_test, y_test_pred)
test_recall = recall_score(y_test, y_test_pred)
test_f1 = f1_score(y_test, y_test_pred)
test_auc = roc_auc_score(y_test, catboost_model.predict_proba(X_test_scaled)[:, 1])

# Print evaluation results
print(f"CatBoost Model Evaluation (Validation Set):")
print(f"Validation Precision: {val_precision:.4f} | Validation Recall: {val_recall:.4f}")
print(f"Validation F1-Score: {val_f1:.4f} | Validation AUC: {val_auc:.4f}")

print(f"\nCatBoost Model Evaluation (Test Set):")
print(f"Test Precision: {test_precision:.4f} | Test Recall: {test_recall:.4f}")
print(f"Test F1-Score: {test_f1:.4f} | Test AUC: {test_auc:.4f}")

print(f"\nTraining Time: {training_time:.4f} seconds")


CatBoost Model Evaluation (Validation Set):
Validation Precision: 0.4286 | Validation Recall: 0.5000
Validation F1-Score: 0.4615 | Validation AUC: 0.5972

CatBoost Model Evaluation (Test Set):
Test Precision: 0.2500 | Test Recall: 0.1667
Test F1-Score: 0.2000 | Test AUC: 0.4167

Training Time: 0.4858 seconds


## Implementing A neural network

Defining

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import time
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score

# Define the Neural Network Model
class NeuralNetwork(nn.Module):
    def __init__(self, input_dim):
        super(NeuralNetwork, self).__init__()
        self.fc1 = nn.Linear(input_dim, 64)
        self.fc2 = nn.Linear(64, 32)
        self.output = nn.Linear(32, 1)
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.sigmoid(self.output(x))
        return x

# Initialize the model
input_dim = X_train_scaled.shape[1]  # Number of features
model = NeuralNetwork(input_dim)

# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)


NeuralNetwork(
  (fc1): Linear(in_features=59, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=32, bias=True)
  (output): Linear(in_features=32, out_features=1, bias=True)
  (relu): ReLU()
  (sigmoid): Sigmoid()
)

Implementation

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
import time

# Check for GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Convert data to PyTorch tensors (Fixed the .values issue)
X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32).to(device)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1).to(device)

X_val_tensor = torch.tensor(X_val_scaled, dtype=torch.float32).to(device)
y_val_tensor = torch.tensor(y_val, dtype=torch.float32).view(-1, 1).to(device)

X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32).to(device)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).view(-1, 1).to(device)

# Create DataLoaders
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Define a simple Neural Network model
class NeuralNetwork(nn.Module):
    def __init__(self, input_dim):
        super(NeuralNetwork, self).__init__()
        self.fc1 = nn.Linear(input_dim, 64)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)
        return self.sigmoid(x)

# Initialize model
input_dim = X_train_scaled.shape[1]
model = NeuralNetwork(input_dim).to(device)

# Loss function and optimizer
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 50
start_time = time.time()

for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0

    for batch_X, batch_y in train_loader:
        optimizer.zero_grad()
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()

    avg_loss = epoch_loss / len(train_loader)
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}")

training_time = time.time() - start_time
print(f"Training Time: {training_time:.2f} seconds")

# Evaluation
model.eval()
with torch.no_grad():
    y_pred_test = model(X_test_tensor)
    y_pred_test = (y_pred_test > 0.5).float()
    accuracy = (y_pred_test == y_test_tensor).float().mean().item()
    print(f"Test Accuracy: {accuracy:.4f}")


Epoch [1/50], Loss: 0.6764
Epoch [2/50], Loss: 0.6549
Epoch [3/50], Loss: 0.6430
Epoch [4/50], Loss: 0.6208
Epoch [5/50], Loss: 0.6085
Epoch [6/50], Loss: 0.5953
Epoch [7/50], Loss: 0.5741
Epoch [8/50], Loss: 0.5606
Epoch [9/50], Loss: 0.5583
Epoch [10/50], Loss: 0.5377
Epoch [11/50], Loss: 0.5050
Epoch [12/50], Loss: 0.4834
Epoch [13/50], Loss: 0.4662
Epoch [14/50], Loss: 0.4747
Epoch [15/50], Loss: 0.4356
Epoch [16/50], Loss: 0.4222
Epoch [17/50], Loss: 0.3968
Epoch [18/50], Loss: 0.3676
Epoch [19/50], Loss: 0.3773
Epoch [20/50], Loss: 0.3336
Epoch [21/50], Loss: 0.3094
Epoch [22/50], Loss: 0.2991
Epoch [23/50], Loss: 0.2662
Epoch [24/50], Loss: 0.2503
Epoch [25/50], Loss: 0.2223
Epoch [26/50], Loss: 0.2084
Epoch [27/50], Loss: 0.1924
Epoch [28/50], Loss: 0.1633
Epoch [29/50], Loss: 0.1426
Epoch [30/50], Loss: 0.1276
Epoch [31/50], Loss: 0.1086
Epoch [32/50], Loss: 0.0938
Epoch [33/50], Loss: 0.0828
Epoch [34/50], Loss: 0.0752
Epoch [35/50], Loss: 0.0668
Epoch [36/50], Loss: 0.0534
E

Evaluation

In [None]:
# Switch to evaluation mode
model.eval()

# Get predictions for validation set
with torch.no_grad():
    y_val_probs = model(X_val_tensor).cpu().numpy()
    y_val_pred = (y_val_probs >= 0.5).astype(int)

    y_test_probs = model(X_test_tensor).cpu().numpy()
    y_test_pred = (y_test_probs >= 0.5).astype(int)

# Compute Metrics
val_precision = precision_score(y_val, y_val_pred)
val_recall = recall_score(y_val, y_val_pred)
val_f1 = f1_score(y_val, y_val_pred)
val_auc = roc_auc_score(y_val, y_val_probs)

test_precision = precision_score(y_test, y_test_pred)
test_recall = recall_score(y_test, y_test_pred)
test_f1 = f1_score(y_test, y_test_pred)
test_auc = roc_auc_score(y_test, y_test_probs)

# Print Evaluation Results
print(f"Neural Network Evaluation (Validation Set):")
print(f"Validation Precision: {val_precision:.4f} | Validation Recall: {val_recall:.4f}")
print(f"Validation F1-Score: {val_f1:.4f} | Validation AUC: {val_auc:.4f}")

print(f"\nNeural Network Evaluation (Test Set):")
print(f"Test Precision: {test_precision:.4f} | Test Recall: {test_recall:.4f}")
print(f"Test F1-Score: {test_f1:.4f} | Test AUC: {test_auc:.4f}")

print(f"\nTraining Time: {training_time:.4f} seconds")


Neural Network Evaluation (Validation Set):
Validation Precision: 1.0000 | Validation Recall: 0.5000
Validation F1-Score: 0.6667 | Validation AUC: 0.7222

Neural Network Evaluation (Test Set):
Test Precision: 0.2500 | Test Recall: 0.1667
Test F1-Score: 0.2000 | Test AUC: 0.4167

Training Time: 0.4476 seconds


## Implementing DNN

Defining

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import time
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score

# Define the Deep Neural Network (DNN) Model
class DNNModel(nn.Module):
    def __init__(self, input_dim):
        super(DNNModel, self).__init__()
        self.fc1 = nn.Linear(input_dim, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 32)
        self.output = nn.Linear(32, 1)
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.relu(self.fc3(x))
        x = self.sigmoid(self.output(x))
        return x

# Initialize the model
input_dim = X_train_scaled.shape[1]
model = DNNModel(input_dim)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Define loss and optimizer
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


Training

In [None]:
# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32).to(device)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1).to(device)

X_val_tensor = torch.tensor(X_val_scaled, dtype=torch.float32).to(device)
y_val_tensor = torch.tensor(y_val, dtype=torch.float32).view(-1, 1).to(device)

# Training parameters
epochs = 100
batch_size = 32
train_losses = []
val_losses = []

# Training loop
start_time = time.time()

for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()

    # Forward pass
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)

    # Backward pass
    loss.backward()
    optimizer.step()

    # Validate the model
    model.eval()
    with torch.no_grad():
        val_outputs = model(X_val_tensor)
        val_loss = criterion(val_outputs, y_val_tensor)

    train_losses.append(loss.item())
    val_losses.append(val_loss.item())

    if (epoch + 1) % 10 == 0:
        print(f"Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}, Val Loss: {val_loss.item():.4f}")

training_time = time.time() - start_time
print(f"\nTraining completed in {training_time:.2f} seconds")


Epoch [10/100], Loss: 0.5956, Val Loss: 0.6140
Epoch [20/100], Loss: 0.4602, Val Loss: 0.5729
Epoch [30/100], Loss: 0.3076, Val Loss: 0.6299
Epoch [40/100], Loss: 0.1881, Val Loss: 0.8405
Epoch [50/100], Loss: 0.0992, Val Loss: 1.0207
Epoch [60/100], Loss: 0.0317, Val Loss: 1.0561
Epoch [70/100], Loss: 0.0081, Val Loss: 1.1030
Epoch [80/100], Loss: 0.0029, Val Loss: 1.1873
Epoch [90/100], Loss: 0.0015, Val Loss: 1.2556
Epoch [100/100], Loss: 0.0010, Val Loss: 1.3096

Training completed in 0.45 seconds


Evaluation

In [None]:
# Convert test data to PyTorch tensors
X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32).to(device)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).view(-1, 1).to(device)

# Get predictions
model.eval()
with torch.no_grad():
    test_outputs = model(X_test_tensor).cpu().numpy()

# Convert probabilities to binary predictions using a threshold of 0.5
y_pred = (test_outputs >= 0.5).astype(int)

# Calculate evaluation metrics
test_precision = precision_score(y_test, y_pred)
test_recall = recall_score(y_test, y_pred)
test_f1 = f1_score(y_test, y_pred)
test_auc = roc_auc_score(y_test, test_outputs)

# Print the results
print("\nDNN Model Test Metrics:")
print(f"Test Precision: {test_precision:.4f} | Test Recall: {test_recall:.4f}")
print(f"Test F1-Score: {test_f1:.4f} | Test AUC: {test_auc:.4f}")
print(f"Training Time: {training_time:.2f} seconds | Learning Rate: 0.001")



DNN Model Test Metrics:
Test Precision: 0.2500 | Test Recall: 0.1667
Test F1-Score: 0.2000 | Test AUC: 0.4167
Training Time: 0.45 seconds | Learning Rate: 0.001


# Comparing The Models

In [None]:
import pandas as pd

# Store the results of all models
model_comparison = pd.DataFrame({
    "Model": [
        "Logistic Regression", "Random Forest", "Support Vector Machine",
        "XGBoost", "Gradient Boosting", "LightGBM", "Naive Bayes",
        "K-Nearest Neighbors", "Decision Tree", "Deep Neural Network"
    ],
    "Precision": [log_precision, rf_precision, svm_precision,
                  xgb_precision, gbm_precision, lgbm_precision,
                  nb_precision, knn_precision, dt_precision, dnn_precision],
    "Recall": [log_recall, rf_recall, svm_recall,
               xgb_recall, gbm_recall, lgbm_recall,
               nb_recall, knn_recall, dt_recall, dnn_recall],
    "F1-Score": [log_f1, rf_f1, svm_f1,
                 xgb_f1, gbm_f1, lgbm_f1,
                 nb_f1, knn_f1, dt_f1, dnn_f1],
    "AUC": [log_auc, rf_auc, svm_auc,
            xgb_auc, gbm_auc, lgbm_auc,
            nb_auc, knn_auc, dt_auc, dnn_auc],
    "Training Time (s)": [log_time, rf_time, svm_time,
                          xgb_time, gbm_time, lgbm_time,
                          nb_time, knn_time, dt_time, dnn_time],
    "Learning Rate": [0.001, "N/A", "N/A", 0.1, 0.1, 0.05, "N/A", "N/A", "N/A", 0.001]
})

# Sort models by highest AUC
model_comparison = model_comparison.sort_values(by="AUC", ascending=False)

# Display the table
print(model_comparison)


NameError: name 'log_precision' is not defined