In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import numpy as np
import pandas as pd
import sklearn as nn
import torch
from sklearn.metrics import roc_auc_score
import math
from numpy import vstack
from torch.utils.data import Dataset
from pandas import read_csv
from torch.utils.data import DataLoader
from torch.nn import Linear
from torch.nn import ReLU
from torch.nn import Tanh
from torch.nn import Sigmoid
from torch.nn import Module
from torch.nn.init import kaiming_uniform_
from torch.optim import SGD
from torch.optim import Adam
from torch.nn import BCELoss
import torch.nn as nn
import time

In [None]:
class CSVDataset(Dataset):
    #Constructor for initially loading
    def __init__(self, path):
        df = read_csv(path, header=0)
        self.X = df.values[0:, :-1]
        self.y = df.values[0:, -1]
        self.X = self.X.astype('float32')
        self.y = self.y.astype('float32')
        self.y = self.y.reshape((len(self.y), 1))

        print(self.X.shape)
        print(self.y.shape)

    # Get the number of rows in the dataset
    def __len__(self):
        return len(self.X)
    # Get a row at an index
    def __getitem__(self,idx):
        return [self.X[idx], self.y[idx]]

In [None]:
def prepare_train_dataset(path):
    train1 = CSVDataset(path)
    train_dl = DataLoader(train1, batch_size=1662, shuffle=False)
    return train_dl

def prepare_test_dataset(path):
    test1 = CSVDataset(path)
    test_dl = DataLoader(test1, batch_size=1024, shuffle=False)
    return test_dl

In [None]:
train_dl_42 = prepare_train_dataset('/content/drive/MyDrive/Colab Notebooks/AFD/merged_train_data.csv')
test_dl_42 = prepare_test_dataset('/content/drive/MyDrive/Colab Notebooks/AFD/merged_test_data.csv')

train_dl_28 = prepare_train_dataset('/content/drive/MyDrive/Colab Notebooks/AFD/merged_train_data_28.csv')
test_dl_28 = prepare_test_dataset('/content/drive/MyDrive/Colab Notebooks/AFD/merged_test_data_28.csv')

train_dl_14 = prepare_train_dataset('/content/drive/MyDrive/Colab Notebooks/AFD/merged_train_data_14.csv')
test_dl_14 = prepare_test_dataset('/content/drive/MyDrive/Colab Notebooks/AFD/merged_test_data_14.csv')

(1662, 42)
(1662, 1)
(125314, 42)
(125314, 1)
(1662, 28)
(1662, 1)
(125314, 28)
(125314, 1)
(1662, 14)
(1662, 1)
(125314, 14)
(125314, 1)


In [None]:
class Logistic_Regression(nn.Module):
    def __init__(self, num_features):
        super().__init__()
        self.layer0 = nn.Linear(in_features=num_features, out_features=1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.layer0(x)
        x = self.sigmoid(x)
        return x

In [None]:
def train_model(model, train_dl, num_epochs):

    # Define loss function and optimizer
    criterion = torch.nn.BCELoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for inputs, labels in train_dl:
            optimizer.zero_grad()
            outputs = model(inputs.float())
            labels = labels.float()
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * inputs.size(0)
        epoch_loss = running_loss / len(train_dl.dataset)

In [None]:
def evaluate_model(model, test_dl):
    model.eval()
    y_true = []
    y_pred = []
    with torch.no_grad():
        for inputs, labels in test_dl:
            outputs = model(inputs.float())
            y_true.extend(labels.numpy())
            y_pred.extend(outputs.numpy().flatten())
    auc = roc_auc_score(y_true, y_pred)
    print(f"AUC: {auc:.4f}")

In [None]:
model_42 = Logistic_Regression(42)

In [None]:
train_model(model_42, train_dl_42, num_epochs=150)


In [None]:
evaluate_model(model_42, test_dl_42)

In [None]:
auc_values = []

# Perform 10 training runs
for i in range(10):
    model_42 = Logistic_Regression(42)
    train_model(model_42, train_dl_42, num_epochs=150)

    # Evaluate the model
    model_42.eval()
    y_true = []
    y_pred = []
    with torch.no_grad():
        for inputs, labels in test_dl_42:
            outputs = model_42(inputs.float())
            y_true.extend(labels.numpy())
            y_pred.extend(outputs.numpy().flatten())
    auc = roc_auc_score(y_true, y_pred)
    auc_values.append(auc)  # Append AUC to the list
    print(f"Run {i+1}: AUC = {auc:.4f}")

# Calculate the average AUC
average_auc = np.mean(auc_values)
print(f"\nAverage AUC: {average_auc:.4f}")

# Calculate the std AUC
auc_std_dev = np.std(auc_values)
print(f"Standard Deviation of AUC: {auc_std_dev:.4f}")


Run 1: AUC = 0.6143
Run 2: AUC = 0.6027
Run 3: AUC = 0.4825
Run 4: AUC = 0.6198
Run 5: AUC = 0.6274
Run 6: AUC = 0.6150
Run 7: AUC = 0.6136
Run 8: AUC = 0.4691
Run 9: AUC = 0.5214
Run 10: AUC = 0.5762

Average AUC: 0.5742
Standard Deviation of AUC: 0.0572


In [None]:
model_28 = Logistic_Regression(28)

In [None]:
train_model(model_28, train_dl_28, num_epochs=150)

In [None]:
evaluate_model(model_28, test_dl_28)

In [None]:
auc_values_28 = []

# Perform 10 training runs
for i in range(10):
    model_28 = Logistic_Regression(28)
    train_model(model_28, train_dl_28, num_epochs=150)

    # Evaluate the model
    model_28.eval()
    y_true = []
    y_pred = []
    with torch.no_grad():
        for inputs, labels in test_dl_28:
            outputs = model_28(inputs.float())
            y_true.extend(labels.numpy())
            y_pred.extend(outputs.numpy().flatten())
    auc_28 = roc_auc_score(y_true, y_pred)
    auc_values_28.append(auc_28)  # Append AUC to the list
    print(f"Run {i+1}: AUC = {auc_28:.4f}")

# Calculate the average AUC
average_auc_28 = np.mean(auc_values_28)
print(f"\nAverage AUC: {average_auc_28:.4f}")

# Calculate the std AUC
auc_std_dev_28 = np.std(auc_values_28)
print(f"Standard Deviation of AUC: {auc_std_dev_28:.4f}")

Run 1: AUC = 0.4169
Run 2: AUC = 0.5776
Run 3: AUC = 0.5786
Run 4: AUC = 0.4890
Run 5: AUC = 0.5900
Run 6: AUC = 0.5537
Run 7: AUC = 0.4395
Run 8: AUC = 0.4432
Run 9: AUC = 0.5311
Run 10: AUC = 0.6131

Average AUC: 0.5233
Standard Deviation of AUC: 0.0674


In [None]:
model_14 = Logistic_Regression(14)

In [None]:
train_model(model_14, train_dl_14, num_epochs=150)

In [None]:
evaluate_model(model_14, test_dl_14)

In [None]:
auc_values_14 = []

# Perform 10 training runs
for i in range(10):
    model_14 = Logistic_Regression(14)
    train_model(model_14, train_dl_14, num_epochs=150)

    # Evaluate the model
    model_14.eval()
    y_true = []
    y_pred = []
    with torch.no_grad():
        for inputs, labels in test_dl_14:
            outputs = model_14(inputs.float())
            y_true.extend(labels.numpy())
            y_pred.extend(outputs.numpy().flatten())
    auc_14 = roc_auc_score(y_true, y_pred)
    auc_values_14.append(auc_14)  # Append AUC to the list
    print(f"Run {i+1}: AUC = {auc_14:.4f}")

# Calculate the average AUC
average_auc_14 = np.mean(auc_values_14)
print(f"\nAverage AUC: {average_auc_14:.4f}")

auc_std_dev_14 = np.std(auc_values_14)
print(f"Standard Deviation of AUC: {auc_std_dev_14:.4f}")

Run 1: AUC = 0.6064
Run 2: AUC = 0.6299
Run 3: AUC = 0.4837
Run 4: AUC = 0.4950
Run 5: AUC = 0.5637
Run 6: AUC = 0.6099
Run 7: AUC = 0.5932
Run 8: AUC = 0.5007
Run 9: AUC = 0.6128
Run 10: AUC = 0.6048

Average AUC: 0.5700
Standard Deviation of AUC: 0.0529
