In [32]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from utils.utils import load_data, remove_zero_features, standardize
from utils.utils import generate_oversampled_set, generate_undersampled_set, generate_label_stats

from utils.mlp_utils import DatasetBrainMeasures
from utils.mlp_train import train, test, train_focal, test_focal, compute_scores
from utils.mlp_model import MLP

from sklearn.model_selection import train_test_split

import torch
from torch import nn
from torch.utils.data import DataLoader

In [33]:
plot_path = "plots/"
checkpoints_path = "checkpoints/"

In [34]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using mps device


---

In [35]:
# Load data for classification task
subject_data, features, diagnoses = load_data('classification')

In [36]:
# Remove zero features
F = remove_zero_features(features.iloc[:,1:])

In [37]:
# Standardize
X = standardize(F)
print(f"Number of samples: {X.shape[0]}")
print(f"Number of features: {X.shape[1]}")

Number of samples: 2815
Number of features: 922


In [38]:
# Remove ID column
Y = diagnoses.iloc[:,1:]
print(f"Number of labels: {Y.shape[1]}")

Number of labels: 13


In [44]:
boot_iter = 100

In [45]:
batch_size = 128

---

# 1. Use dataset with original label distribution (no resampling)

In [9]:
# Split dataset into train and test (holdout) set
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.25, random_state=0)
print(f"Number of samples in training set: {len(X_train)}")
print(f"Number of samples in test set: {len(X_test)}")

Number of samples in training set: 2111
Number of samples in test set: 704


In [10]:
training_data = DatasetBrainMeasures(X_train, Y_train) 
test_data = DatasetBrainMeasures(X_test, Y_test)
print(f"Size of training set: {len(training_data)}")
print(f"Size of test set: {len(test_data)}")

Size of training set: 2111
Size of test set: 704


In [12]:
train_dataloader = DataLoader(training_data, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=batch_size, shuffle=True)

In [41]:
for X_, y_ in test_dataloader:
    print(f"Shape of X [batch_size, D]: {X_.shape}")
    print(f"Shape of Y [batch_size]: {y_.shape} {y_.dtype}")
    break

Shape of X [batch_size, D]: torch.Size([128, 922])
Shape of Y [batch_size]: torch.Size([128, 13]) torch.float32


---

## 1.1. BCE loss

In [20]:
model = MLP(input_dim=X_train.shape[1], output_dim=Y_train.shape[1]).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

In [50]:
loss_fn = nn.BCEWithLogitsLoss()

In [22]:
epochs = 10
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, device, model, loss_fn, optimizer)
    test(test_dataloader, device, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------
loss: 0.542596  [ 1071/ 2111]
Test Error: 
 Accuracy: 83.3%, Avg loss: 0.452492 

Epoch 2
-------------------------------
loss: 0.482496  [ 1071/ 2111]
Test Error: 
 Accuracy: 84.2%, Avg loss: 0.403383 

Epoch 3
-------------------------------
loss: 0.438573  [ 1071/ 2111]
Test Error: 
 Accuracy: 84.3%, Avg loss: 0.393528 

Epoch 4
-------------------------------
loss: 0.393759  [ 1071/ 2111]
Test Error: 
 Accuracy: 83.9%, Avg loss: 0.388978 

Epoch 5
-------------------------------
loss: 0.380422  [ 1071/ 2111]
Test Error: 
 Accuracy: 83.8%, Avg loss: 0.385814 

Epoch 6
-------------------------------
loss: 0.388738  [ 1071/ 2111]
Test Error: 
 Accuracy: 83.9%, Avg loss: 0.384843 

Epoch 7
-------------------------------
loss: 0.428315  [ 1071/ 2111]
Test Error: 
 Accuracy: 83.7%, Avg loss: 0.386596 

Epoch 8
-------------------------------
loss: 0.360277  [ 1071/ 2111]
Test Error: 
 Accuracy: 83.6%, Avg loss: 0.381355 

Epoch 9
----------------

In [23]:
compute_scores(X_test, Y_test, device, model, batch_size, boot_iter)

Mean scores with SE and 95% confidence intervals:

auprc_macro:                  0.20 (0.01) [0.19, 0.21]
auprc_weighted:               0.34 (0.01) [0.33, 0.36]
auroc_macro:                  0.55 (0.01) [0.52, 0.56]
auroc_weighted:               0.54 (0.01) [0.52, 0.56]
brier_macro:                  0.12 (0.00) [0.11, 0.12]
brier_weighted:               0.03 (0.00) [0.03, 0.03]
balanced_accuracy_macro:      0.50 (0.00) [0.50, 0.50]
balanced_accuracy_weighted:   0.08 (0.00) [0.08, 0.08]
f1_micro:                     0.38 (0.01) [0.36, 0.40]
hamming:                      0.15 (0.00) [0.14, 0.16]
subset_accuracy:              0.10 (0.01) [0.08, 0.12]


---

## 1.2. Focal loss

In [28]:
model = MLP(input_dim=X_train.shape[1], output_dim=Y_train.shape[1]).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
gamma = 2.0

In [29]:
epochs = 10
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_focal(train_dataloader, device, model, optimizer, gamma)
    test_focal(test_dataloader, device, model, gamma)
print("Done!")

Epoch 1
-------------------------------
loss: 0.150447  [ 1071/ 2111]
Test Error: 
 Accuracy: 83.3%, Avg loss: 0.134527 

Epoch 2
-------------------------------
loss: 0.125204  [ 1071/ 2111]
Test Error: 
 Accuracy: 83.3%, Avg loss: 0.113453 

Epoch 3
-------------------------------
loss: 0.109538  [ 1071/ 2111]
Test Error: 
 Accuracy: 83.3%, Avg loss: 0.109520 

Epoch 4
-------------------------------
loss: 0.109046  [ 1071/ 2111]
Test Error: 
 Accuracy: 83.3%, Avg loss: 0.103447 

Epoch 5
-------------------------------
loss: 0.113481  [ 1071/ 2111]
Test Error: 
 Accuracy: 83.3%, Avg loss: 0.105218 

Epoch 6
-------------------------------
loss: 0.102034  [ 1071/ 2111]
Test Error: 
 Accuracy: 83.3%, Avg loss: 0.105205 

Epoch 7
-------------------------------
loss: 0.106109  [ 1071/ 2111]
Test Error: 
 Accuracy: 83.3%, Avg loss: 0.104344 

Epoch 8
-------------------------------
loss: 0.108610  [ 1071/ 2111]
Test Error: 
 Accuracy: 83.3%, Avg loss: 0.103143 

Epoch 9
----------------

In [30]:
compute_scores(X_test, Y_test, device, model, batch_size, boot_iter)

Mean scores with SE and 95% confidence intervals:

auprc_macro:                  0.19 (0.01) [0.18, 0.20]
auprc_weighted:               0.33 (0.01) [0.31, 0.35]
auroc_macro:                  0.53 (0.01) [0.50, 0.55]
auroc_weighted:               0.52 (0.01) [0.51, 0.55]
brier_macro:                  0.16 (0.00) [0.16, 0.17]
brier_weighted:               0.03 (0.00) [0.03, 0.03]
balanced_accuracy_macro:      0.50 (0.00) [0.50, 0.50]
balanced_accuracy_weighted:   0.08 (0.00) [0.08, 0.08]
f1_micro:                     0.38 (0.01) [0.36, 0.40]
hamming:                      0.15 (0.00) [0.14, 0.16]
subset_accuracy:              0.10 (0.01) [0.08, 0.12]


# 2. Use undersampled dataset

In [40]:
X_under, Y_under = generate_undersampled_set(X, Y)
label_stats, mean_ir = generate_label_stats(Y_under, True)
print(f"Mean imbalance ratio: {mean_ir}")
label_stats

Mean imbalance ratio: 2.7700915195670985


Unnamed: 0,Absolute frequency,Relative frequency,Imbalance ratio
Trauma_And_Stress_RelatedDisorders,69,0.069277,3.26087
DepressiveDisorders,103,0.103414,2.184466
Attention_Deficit_HyperactivityDisorder,225,0.225904,1.0
MotorDisorder,68,0.068273,3.308824
AutismSpectrumDisorder,117,0.11747,1.923077
CommunicationDisorder,105,0.105422,2.142857
OtherDisorders,55,0.055221,4.090909
SpecificLearningDisorder,177,0.177711,1.271186
Obsessive_Compulsive_And_RelatedDisorders,52,0.052209,4.326923
Disruptive,103,0.103414,2.184466


In [46]:
# Split dataset into train and test (holdout) set
X_train, X_test, Y_train, Y_test = train_test_split(X_under, Y_under, test_size=0.25, random_state=0)

In [47]:
training_data = DatasetBrainMeasures(X_train, Y_train) 
test_data = DatasetBrainMeasures(X_test, Y_test)
print(f"Size of training set: {len(training_data)}")
print(f"Size of test set: {len(test_data)}")

Size of training set: 747
Size of test set: 249


In [48]:
train_dataloader = DataLoader(training_data, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=batch_size, shuffle=True)

In [49]:
for X_, y_ in test_dataloader:
    print(f"Shape of X [batch_size, D]: {X_.shape}")
    print(f"Shape of Y [batch_size]: {y_.shape} {y_.dtype}")
    break

Shape of X [batch_size, D]: torch.Size([128, 922])
Shape of Y [batch_size]: torch.Size([128, 13]) torch.float32


---

In [51]:
model = MLP(input_dim=X_train.shape[1], output_dim=Y_train.shape[1]).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

In [52]:
loss_fn = nn.BCEWithLogitsLoss()

In [53]:
epochs = 10
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, device, model, loss_fn, optimizer)
    test(test_dataloader, device, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------
loss: 0.689463  [  642/  747]
Test Error: 
 Accuracy: 89.9%, Avg loss: 0.673110 

Epoch 2
-------------------------------
loss: 0.576843  [  642/  747]
Test Error: 
 Accuracy: 89.2%, Avg loss: 0.480573 

Epoch 3
-------------------------------
loss: 0.542820  [  642/  747]
Test Error: 
 Accuracy: 89.9%, Avg loss: 0.401111 

Epoch 4
-------------------------------
loss: 0.473360  [  642/  747]
Test Error: 
 Accuracy: 89.9%, Avg loss: 0.403863 

Epoch 5
-------------------------------
loss: 0.485260  [  642/  747]
Test Error: 
 Accuracy: 89.9%, Avg loss: 0.355795 

Epoch 6
-------------------------------
loss: 0.432499  [  642/  747]
Test Error: 
 Accuracy: 89.9%, Avg loss: 0.347368 

Epoch 7
-------------------------------
loss: 0.450551  [  642/  747]
Test Error: 
 Accuracy: 89.9%, Avg loss: 0.338777 

Epoch 8
-------------------------------
loss: 0.401458  [  642/  747]
Test Error: 
 Accuracy: 89.9%, Avg loss: 0.339597 

Epoch 9
----------------

In [54]:
compute_scores(X_test, Y_test, device, model, batch_size, boot_iter)

Mean scores with SE and 95% confidence intervals:

auprc_macro:                  0.14 (0.01) [0.11, 0.17]
auprc_weighted:               0.17 (0.01) [0.14, 0.20]
auroc_macro:                  0.54 (0.02) [0.50, 0.58]
auroc_weighted:               0.54 (0.02) [0.50, 0.58]
brier_macro:                  0.09 (0.00) [0.08, 0.10]
brier_weighted:               0.01 (0.00) [0.01, 0.01]
balanced_accuracy_macro:      0.50 (0.00) [0.50, 0.50]
balanced_accuracy_weighted:   0.05 (0.00) [0.05, 0.05]
f1_micro:                     0.00 (0.00) [0.00, 0.00]
hamming:                      0.10 (0.01) [0.09, 0.11]
subset_accuracy:              0.34 (0.03) [0.28, 0.41]


# 3. Use oversampled dataset

In [55]:
# Resample data (undersampling)
X_over, Y_over = generate_oversampled_set(X, Y)
label_stats, mean_ir = generate_label_stats(Y_over, True)
print(f"Mean imbalance ratio: {mean_ir}")
label_stats

Mean imbalance ratio: 1.6092872677464145


Unnamed: 0,Absolute frequency,Relative frequency,Imbalance ratio
Trauma_And_Stress_RelatedDisorders,2580,0.116174,1.923256
DepressiveDisorders,3170,0.142741,1.5653
Attention_Deficit_HyperactivityDisorder,4582,0.206322,1.082933
MotorDisorder,3134,0.14112,1.58328
AutismSpectrumDisorder,3689,0.166111,1.34508
CommunicationDisorder,4431,0.199523,1.119838
OtherDisorders,2320,0.104467,2.138793
SpecificLearningDisorder,4962,0.223433,1.0
Obsessive_Compulsive_And_RelatedDisorders,2668,0.120137,1.85982
Disruptive,2801,0.126126,1.77151


In [56]:
# Split dataset into train and test (holdout) set
X_train, X_test, Y_train, Y_test = train_test_split(X_over, Y_over, test_size=0.25, random_state=0)

In [57]:
training_data = DatasetBrainMeasures(X_train, Y_train) 
test_data = DatasetBrainMeasures(X_test, Y_test)
print(f"Size of training set: {len(training_data)}")
print(f"Size of test set: {len(test_data)}")

Size of training set: 16656
Size of test set: 5552


In [58]:
train_dataloader = DataLoader(training_data, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=batch_size, shuffle=True)

In [59]:
for X_, y_ in test_dataloader:
    print(f"Shape of X [batch_size, D]: {X_.shape}")
    print(f"Shape of Y [batch_size]: {y_.shape} {y_.dtype}")
    break

Shape of X [batch_size, D]: torch.Size([128, 922])
Shape of Y [batch_size]: torch.Size([128, 13]) torch.float32


---

In [63]:
model = MLP(input_dim=X_train.shape[1], output_dim=Y_train.shape[1]).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

In [64]:
loss_fn = nn.BCEWithLogitsLoss()

In [None]:
epochs = 20
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, device, model, loss_fn, optimizer)
    test(test_dataloader, device, model, loss_fn)
print("Done!")

In [66]:
compute_scores(X_test, Y_test, device, model, batch_size, boot_iter)

Mean scores with SE and 95% confidence intervals:

auprc_macro:                  0.88 (0.00) [0.88, 0.89]
auprc_weighted:               0.86 (0.00) [0.86, 0.87]
auroc_macro:                  0.97 (0.00) [0.97, 0.97]
auroc_weighted:               0.96 (0.00) [0.96, 0.96]
brier_macro:                  0.04 (0.00) [0.04, 0.05]
brier_weighted:               0.01 (0.00) [0.01, 0.01]
balanced_accuracy_macro:      0.85 (0.00) [0.85, 0.86]
balanced_accuracy_weighted:   0.12 (0.00) [0.12, 0.13]
f1_micro:                     0.77 (0.00) [0.76, 0.78]
hamming:                      0.06 (0.00) [0.06, 0.06]
subset_accuracy:              0.56 (0.01) [0.54, 0.57]
