In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from utils.utils import load_data, remove_zero_features, standardize
from utils.neuralnet_utils import datasetMF
from utils.neuralnet_train import train, test, eval
from utils.neuralnet_model import MLMLP

from sklearn.model_selection import train_test_split
from sklearn.utils import resample
from sklearn.metrics import average_precision_score, roc_auc_score, brier_score_loss, f1_score, hamming_loss

import torch
from torch import nn
from torch.utils.data import DataLoader

In [2]:
plot_path = "plots/"
weights_path = "weights/"

In [3]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using mps device


In [4]:
# Load data for classification set
subject_data, features, diagnoses = load_data('classification')

In [5]:
# Preprocessing
X, _ = remove_zero_features(features.iloc[:,1:])
X = standardize(X)
print(f"Number of samples: {X.shape[0]}")
print(f"Number of features: {X.shape[1]}")

Number of samples: 2815
Number of features: 922


In [6]:
Y = diagnoses.iloc[:,1:]
print(f"Number of labels: {Y.shape[1]}")

Number of labels: 13


In [7]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.25, random_state=0)

---

In [8]:
training_data = datasetMF(X_train, Y_train) 
test_data = datasetMF(X_test, Y_test)
print(f"Size of training set: {len(training_data)}")
print(f"Size of test set: {len(test_data)}")

Size of training set: 2111
Size of test set: 704


In [9]:
train_dataloader = DataLoader(training_data, batch_size=128, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=128, shuffle=True)

In [10]:
for X, y in test_dataloader:
    print(f"Shape of X [batch_size, D]: {X.shape}")
    print(f"Shape of Y [batch_size]: {y.shape} {y.dtype}")
    break

Shape of X [batch_size, D]: torch.Size([128, 922])
Shape of Y [batch_size]: torch.Size([128, 13]) torch.float32


In [14]:
model = MLMLP().to(device)
loss_fn = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=5e-3)

In [15]:
epochs = 20
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, device, model, loss_fn, optimizer)
    test(test_dataloader, device, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------
loss: 0.699943  [  128/ 2111]
Test Error: 
 Accuracy: 0.0%, Avg loss: 0.637952 

Epoch 2
-------------------------------
loss: 0.643524  [  128/ 2111]
Test Error: 
 Accuracy: 0.0%, Avg loss: 0.540665 

Epoch 3
-------------------------------
loss: 0.653499  [  128/ 2111]
Test Error: 
 Accuracy: 0.0%, Avg loss: 0.509820 

Epoch 4
-------------------------------
loss: 0.610869  [  128/ 2111]
Test Error: 
 Accuracy: 0.0%, Avg loss: 0.540673 

Epoch 5
-------------------------------
loss: 0.563083  [  128/ 2111]
Test Error: 
 Accuracy: 0.0%, Avg loss: 0.541527 

Epoch 6
-------------------------------
loss: 0.549559  [  128/ 2111]
Test Error: 
 Accuracy: 0.0%, Avg loss: 0.495634 

Epoch 7
-------------------------------
loss: 0.527740  [  128/ 2111]
Test Error: 
 Accuracy: 0.0%, Avg loss: 0.448319 

Epoch 8
-------------------------------
loss: 0.534007  [  128/ 2111]
Test Error: 
 Accuracy: 0.0%, Avg loss: 0.430424 

Epoch 9
------------------------

---

In [16]:
auprc = []
auroc = []
brier = []
hamm = []
f1 = []

eval_dataloader = DataLoader(test_data, batch_size=128, shuffle=False)

for i in range(100):
    X_test_resampled, y_test_resampled = resample(X_test, Y_test, replace=True, n_samples=len(Y_test), random_state=0+i)

    y_prob, y_pred  = eval(eval_dataloader, device, model, loss_fn)
    
    # Compute brier score
    brier_scores = np.zeros(y_prob.shape[1])
    for i in range(y_prob.shape[1]):
        brier_scores[i] = brier_score_loss(y_test_resampled.iloc[:,i], y_prob[:,i])
    brier.append(brier_scores.mean())
    
    # Other metrics
    auprc.append(average_precision_score(y_test_resampled, y_prob, average='macro'))
    auroc.append(roc_auc_score(y_test_resampled, y_prob, average='macro'))
    f1.append(f1_score(y_test_resampled, y_pred, average='micro'))
    hamm.append(hamming_loss(y_test_resampled, y_pred))

print(f"Mean scores for always zero baseline with MultiOutputClassifier with 95% confidence intervals:")
print("    AUPRC macro: {:.2f} [{:.2f}, {:.2f}]".format(np.mean(auprc), np.percentile(auprc, 2.5), np.percentile(auprc, 97.5)))
print("    AUROC macro: {:.2f} [{:.2f}, {:.2f}]".format(np.mean(auroc), np.percentile(auroc, 2.5), np.percentile(auroc, 97.5)))
print("    Brier score: {:.2f} [{:.2f}, {:.2f}]".format(np.mean(brier), np.percentile(brier, 2.5), np.percentile(brier, 97.5)))
print("    Hamming loss: {:.2f} [{:.2f}, {:.2f}]".format(np.mean(hamm), np.percentile(hamm, 2.5), np.percentile(hamm, 97.5)))
print("    Micro Avg F1 score: {:.2f} [{:.2f}, {:.2f}]".format(np.mean(f1), np.percentile(f1, 2.5), np.percentile(f1, 97.5)))

Mean scores for always zero baseline with MultiOutputClassifier with 95% confidence intervals:
    AUPRC macro: 0.17 [0.16, 0.19]
    AUROC macro: 0.50 [0.48, 0.52]
    Brier score: 0.12 [0.12, 0.13]
    Hamming loss: 0.15 [0.14, 0.16]
    Micro Avg F1 score: 0.38 [0.36, 0.40]
