In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from utils.utils import load_data, remove_zero_features, load_confounders, deconfound_linear, standardize
from utils.mlp_utils import datasetMF
from utils.mlp_train import train, test, eval, train_focal, test_focal
from utils.mlp_model import MLP

from sklearn.model_selection import train_test_split
from sklearn.utils import resample
from sklearn.metrics import average_precision_score, roc_auc_score, brier_score_loss, f1_score, hamming_loss

import torch
from torch import nn
from torch.utils.data import DataLoader

In [2]:
plot_path = "plots/"
checkpoints_path = "checkpoints/"

In [3]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using mps device


---

In [4]:
# Load data for classification task
subject_data, features, diagnoses = load_data('classification')

In [5]:
# Remove zero features
F = remove_zero_features(features.iloc[:,1:])

In [6]:
# Load confounders
#C = load_confounders(subject_data)

# Apply deconfounding
#F = deconfound_linear(C, F)

In [7]:
# Standardize
X = standardize(F)
print(f"Number of samples: {X.shape[0]}")
print(f"Number of features: {X.shape[1]}")

Number of samples: 2815
Number of features: 922


In [8]:
#X_plus = pd.concat((C, F.iloc[:,1:]), axis=1)
#X_plus = standardize(X_plus)

In [9]:
# Remove ID column
Y = diagnoses.iloc[:,1:]
print(f"Number of labels: {Y.shape[1]}")

Number of labels: 13


In [10]:
# Split dataset into train and test (holdout) set
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.25, random_state=0)
#X_train, X_test, Y_train, Y_test = train_test_split(X_plus, Y, test_size=0.25, random_state=0)
print(f"Number of samples in training set: {len(X_train)}")
print(f"Number of samples in test set: {len(X_test)}")

Number of samples in training set: 2111
Number of samples in test set: 704


---

In [11]:
training_data = datasetMF(X_train, Y_train) 
test_data = datasetMF(X_test, Y_test)
print(f"Size of training set: {len(training_data)}")
print(f"Size of test set: {len(test_data)}")

Size of training set: 2111
Size of test set: 704


In [12]:
batch_size = 128

In [13]:
train_dataloader = DataLoader(training_data, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=batch_size, shuffle=True)

In [14]:
for X, y in test_dataloader:
    print(f"Shape of X [batch_size, D]: {X.shape}")
    print(f"Shape of Y [batch_size]: {y.shape} {y.dtype}")
    break

Shape of X [batch_size, D]: torch.Size([128, 922])
Shape of Y [batch_size]: torch.Size([128, 13]) torch.float32


---

BCE loss

In [15]:
model = MLP(input_dim=X_train.shape[1], output_dim=Y_train.shape[1]).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

In [23]:
loss_fn = nn.BCEWithLogitsLoss()

In [24]:
epochs = 20
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, device, model, loss_fn, optimizer)
    test(test_dataloader, device, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------
loss: 0.604264  [ 1071/ 2111]
Test Error: 
 Accuracy: 83.1%, Avg loss: 0.559689 

Epoch 2
-------------------------------
loss: 0.514664  [ 1071/ 2111]
Test Error: 
 Accuracy: 84.6%, Avg loss: 0.454788 

Epoch 3
-------------------------------
loss: 0.457410  [ 1071/ 2111]
Test Error: 
 Accuracy: 84.8%, Avg loss: 0.405467 

Epoch 4
-------------------------------
loss: 0.416961  [ 1071/ 2111]
Test Error: 
 Accuracy: 83.6%, Avg loss: 0.393267 

Epoch 5
-------------------------------
loss: 0.438198  [ 1071/ 2111]
Test Error: 
 Accuracy: 83.7%, Avg loss: 0.383266 

Epoch 6
-------------------------------
loss: 0.422225  [ 1071/ 2111]
Test Error: 
 Accuracy: 83.8%, Avg loss: 0.386657 

Epoch 7
-------------------------------
loss: 0.398145  [ 1071/ 2111]
Test Error: 
 Accuracy: 84.0%, Avg loss: 0.382324 

Epoch 8
-------------------------------
loss: 0.384319  [ 1071/ 2111]
Test Error: 
 Accuracy: 83.8%, Avg loss: 0.382415 

Epoch 9
----------------

In [25]:
auprc = []
auroc = []
brier = []
hamm = []
f1 = []

for i in range(100):
    X_test_resampled, y_test_resampled = resample(X_test, Y_test, replace=True, n_samples=len(Y_test), random_state=0+i)

    eval_data = datasetMF(X_test_resampled, y_test_resampled)
    eval_dataloader = DataLoader(eval_data, batch_size=batch_size, shuffle=False)
    y_prob, y_pred  = eval(eval_dataloader, device, model)
    
    # Compute brier score
    brier_scores = np.zeros(y_prob.shape[1])
    for i in range(y_prob.shape[1]):
        brier_scores[i] = brier_score_loss(y_test_resampled.iloc[:,i], y_prob[:,i])
    brier.append(brier_scores.mean())
    
    # Other metrics
    auprc.append(average_precision_score(y_test_resampled, y_prob, average='macro'))
    auroc.append(roc_auc_score(y_test_resampled, y_prob, average='macro'))
    f1.append(f1_score(y_test_resampled, y_pred, average='micro'))
    hamm.append(hamming_loss(y_test_resampled, y_pred))

print(f"Mean scores for combined MLP with with 95% confidence intervals:")
print("    AUPRC macro: {:.2f} [{:.2f}, {:.2f}]".format(np.mean(auprc), np.percentile(auprc, 2.5), np.percentile(auprc, 97.5)))
print("    AUROC macro: {:.2f} [{:.2f}, {:.2f}]".format(np.mean(auroc), np.percentile(auroc, 2.5), np.percentile(auroc, 97.5)))
print("    Brier score: {:.2f} [{:.2f}, {:.2f}]".format(np.mean(brier), np.percentile(brier, 2.5), np.percentile(brier, 97.5)))
print("    Hamming loss: {:.2f} [{:.2f}, {:.2f}]".format(np.mean(hamm), np.percentile(hamm, 2.5), np.percentile(hamm, 97.5)))
print("    Micro Avg F1 score: {:.2f} [{:.2f}, {:.2f}]".format(np.mean(f1), np.percentile(f1, 2.5), np.percentile(f1, 97.5)))

Mean scores for combined MLP with with 95% confidence intervals:
    AUPRC macro: 0.22 [0.20, 0.23]
    AUROC macro: 0.55 [0.53, 0.58]
    Brier score: 0.12 [0.11, 0.12]
    Hamming loss: 0.15 [0.14, 0.16]
    Micro Avg F1 score: 0.38 [0.36, 0.40]


---

Focal loss

In [21]:
model = MLP(input_dim=X_train.shape[1], output_dim=Y_train.shape[1]).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
gamma = 2.0

In [22]:
epochs = 20
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_focal(train_dataloader, device, model, optimizer, gamma)
    test_focal(test_dataloader, device, model, gamma)
print("Done!")

Epoch 1
-------------------------------
loss: 0.268102  [ 1071/ 2111]
Test Error: 
 Accuracy: 83.3%, Avg loss: 0.237210 

Epoch 2
-------------------------------
loss: 0.235687  [ 1071/ 2111]
Test Error: 
 Accuracy: 83.3%, Avg loss: 0.213526 

Epoch 3
-------------------------------
loss: 0.217806  [ 1071/ 2111]
Test Error: 
 Accuracy: 83.3%, Avg loss: 0.202438 

Epoch 4
-------------------------------
loss: 0.218142  [ 1071/ 2111]
Test Error: 
 Accuracy: 83.3%, Avg loss: 0.201759 

Epoch 5
-------------------------------
loss: 0.219017  [ 1071/ 2111]
Test Error: 
 Accuracy: 83.3%, Avg loss: 0.199066 

Epoch 6
-------------------------------
loss: 0.199332  [ 1071/ 2111]
Test Error: 
 Accuracy: 83.3%, Avg loss: 0.198957 

Epoch 7
-------------------------------
loss: 0.211503  [ 1071/ 2111]
Test Error: 
 Accuracy: 83.3%, Avg loss: 0.195480 

Epoch 8
-------------------------------
loss: 0.211948  [ 1071/ 2111]
Test Error: 
 Accuracy: 83.3%, Avg loss: 0.196172 

Epoch 9
----------------

In [23]:
auprc = []
auroc = []
brier = []
hamm = []
f1 = []

for i in range(100):
    X_test_resampled, y_test_resampled = resample(X_test, Y_test, replace=True, n_samples=len(Y_test), random_state=0+i)

    eval_data = datasetMF(X_test_resampled, y_test_resampled)
    eval_dataloader = DataLoader(eval_data, batch_size=batch_size, shuffle=False)
    y_prob, y_pred  = eval(eval_dataloader, device, model)
    
    # Compute brier score
    brier_scores = np.zeros(y_prob.shape[1])
    for i in range(y_prob.shape[1]):
        brier_scores[i] = brier_score_loss(y_test_resampled.iloc[:,i], y_prob[:,i])
    brier.append(brier_scores.mean())
    
    # Other metrics
    auprc.append(average_precision_score(y_test_resampled, y_prob, average='macro'))
    auroc.append(roc_auc_score(y_test_resampled, y_prob, average='macro'))
    f1.append(f1_score(y_test_resampled, y_pred, average='micro'))
    hamm.append(hamming_loss(y_test_resampled, y_pred))

print(f"Mean scores for combined MLP with with 95% confidence intervals:")
print("    AUPRC macro: {:.2f} [{:.2f}, {:.2f}]".format(np.mean(auprc), np.percentile(auprc, 2.5), np.percentile(auprc, 97.5)))
print("    AUROC macro: {:.2f} [{:.2f}, {:.2f}]".format(np.mean(auroc), np.percentile(auroc, 2.5), np.percentile(auroc, 97.5)))
print("    Brier score: {:.2f} [{:.2f}, {:.2f}]".format(np.mean(brier), np.percentile(brier, 2.5), np.percentile(brier, 97.5)))
print("    Hamming loss: {:.2f} [{:.2f}, {:.2f}]".format(np.mean(hamm), np.percentile(hamm, 2.5), np.percentile(hamm, 97.5)))
print("    Micro Avg F1 score: {:.2f} [{:.2f}, {:.2f}]".format(np.mean(f1), np.percentile(f1, 2.5), np.percentile(f1, 97.5)))

Mean scores for combined MLP with with 95% confidence intervals:
    AUPRC macro: 0.21 [0.19, 0.23]
    AUROC macro: 0.55 [0.52, 0.57]
    Brier score: 0.13 [0.13, 0.14]
    Hamming loss: 0.15 [0.14, 0.16]
    Micro Avg F1 score: 0.38 [0.36, 0.40]
