In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from utils.utils import load_data, remove_zero_features, standardize
from utils.utils import generate_oversampled_set, generate_undersampled_set, generate_label_stats

from utils.mlp_utils import DatasetBrainMeasures
from utils.mlp_train import train, test, train_focal, test_focal, compute_scores
from utils.mlp_model import MLP

from sklearn.model_selection import train_test_split

import torch
from torch import nn
from torch.utils.data import DataLoader

In [2]:
plot_path = "plots/"
checkpoints_path = "checkpoints/"

In [3]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using mps device


---

In [4]:
# Load data for classification task
subject_data, features, diagnoses = load_data('classification')

In [5]:
subject_data

Unnamed: 0,ID,release_number,Bold.Sequence,Sex,Age,EHQ_Total,Commercial_Use,Full_Pheno,Site,Field_Strength,Cohort
0,NDARDY714NV9,R3,abcd,Male,13.773214,74.57,Yes,Yes,CBIC,3.0,5
1,NDAREG930XPP,R3,cmrr,Male,7.995779,73.37,Yes,Yes,CBIC,3.0,2
2,NDAREK255DEE,R3,cmrr,Female,6.508898,26.68,Yes,Yes,CBIC,3.0,2
3,NDARFB757VY3,R3,abcd,Male,5.851813,-6.67,Yes,Yes,CBIC,3.0,2
4,NDARFJ803JF7,R3,abcd,Female,16.335843,61.16,Yes,Yes,CBIC,3.0,6
...,...,...,...,...,...,...,...,...,...,...,...
2810,NDARZP564MHU,R1,,Male,20.910107,100.05,No,Yes,SI,1.5,6
2811,NDARZR567HWG,R1,,Female,13.260552,77.84,No,Yes,SI,1.5,5
2812,NDARZT772PU4,R1,,Female,17.707278,16.68,No,Yes,SI,1.5,6
2813,NDARZV766YXP,R1,,Male,10.788272,84.51,No,Yes,SI,1.5,4


In [6]:
# Remove zero features
F = remove_zero_features(features.iloc[:,1:])

In [7]:
# Standardize
X = standardize(F)
print(f"Number of samples: {X.shape[0]}")
print(f"Number of features: {X.shape[1]}")

Number of samples: 2815
Number of features: 922


In [8]:
# Remove ID column
Y = diagnoses.iloc[:,1:]
print(f"Number of labels: {Y.shape[1]}")

Number of labels: 13


In [9]:
boot_iter = 100

In [10]:
batch_size = 128

---

# 1. Use dataset with original label distribution (no resampling)

In [13]:
# Split dataset into train and test (holdout) set
X_train, X_holdout, Y_train, Y_holdout = train_test_split(X, Y, test_size=0.25, random_state=0)
X_train, X_test, Y_train, Y_test = train_test_split(X_train, Y_train, test_size=0.2, random_state=1)
print(f"Number of samples in training set: {len(X_train)}")
print(f"Number of samples in test set: {len(X_test)}")
print(f"Number of samples in holdout set: {len(X_holdout)}")

Number of samples in training set: 1688
Number of samples in test set: 423
Number of samples in holdout set: 704


In [14]:
training_data = DatasetBrainMeasures(X_train, Y_train) 
test_data = DatasetBrainMeasures(X_test, Y_test)
print(f"Size of training set: {len(training_data)}")
print(f"Size of test set: {len(test_data)}")

Size of training set: 1688
Size of test set: 423


In [15]:
train_dataloader = DataLoader(training_data, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=batch_size, shuffle=True)

In [16]:
for X_, y_ in test_dataloader:
    print(f"Shape of X [batch_size, D]: {X_.shape}")
    print(f"Shape of Y [batch_size]: {y_.shape} {y_.dtype}")
    break

Shape of X [batch_size, D]: torch.Size([128, 922])
Shape of Y [batch_size]: torch.Size([128, 13]) torch.float32


---

## 1.1. BCE loss

In [17]:
model = MLP(input_dim=X_train.shape[1], output_dim=Y_train.shape[1]).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

In [18]:
loss_fn = nn.BCEWithLogitsLoss()

In [19]:
epochs = 10
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, device, model, loss_fn, optimizer)
    test(test_dataloader, device, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------
loss: 0.612316  [  336/ 1688]
Test Error: 
 Accuracy: 84.3%, Avg loss: 0.462676 

Epoch 2
-------------------------------
loss: 0.488067  [  336/ 1688]
Test Error: 
 Accuracy: 84.4%, Avg loss: 0.410873 

Epoch 3
-------------------------------
loss: 0.425903  [  336/ 1688]
Test Error: 
 Accuracy: 84.4%, Avg loss: 0.394935 

Epoch 4
-------------------------------
loss: 0.381059  [  336/ 1688]
Test Error: 
 Accuracy: 84.7%, Avg loss: 0.380453 

Epoch 5
-------------------------------
loss: 0.438287  [  336/ 1688]
Test Error: 
 Accuracy: 84.2%, Avg loss: 0.378000 

Epoch 6
-------------------------------
loss: 0.385393  [  336/ 1688]
Test Error: 
 Accuracy: 84.3%, Avg loss: 0.375118 

Epoch 7
-------------------------------
loss: 0.412439  [  336/ 1688]
Test Error: 
 Accuracy: 84.3%, Avg loss: 0.377384 

Epoch 8
-------------------------------
loss: 0.389165  [  336/ 1688]
Test Error: 
 Accuracy: 84.4%, Avg loss: 0.369965 

Epoch 9
----------------

In [20]:
compute_scores(X_holdout, Y_holdout, device, model, batch_size, boot_iter)

Mean scores with SE and 95% confidence intervals:

auprc_macro:                  0.21 (0.01) [0.19, 0.22]
auprc_weighted:               0.35 (0.01) [0.33, 0.37]
auroc_macro:                  0.55 (0.01) [0.53, 0.57]
auroc_weighted:               0.55 (0.01) [0.53, 0.57]
brier_macro:                  0.12 (0.00) [0.11, 0.12]
brier_weighted:               0.03 (0.00) [0.03, 0.03]
balanced_accuracy_macro:      0.50 (0.00) [0.50, 0.50]
balanced_accuracy_weighted:   0.08 (0.00) [0.08, 0.08]
f1_micro:                     0.38 (0.01) [0.36, 0.40]
hamming:                      0.15 (0.00) [0.14, 0.16]
subset_accuracy:              0.10 (0.01) [0.08, 0.12]


---

## 1.2. Focal loss

In [21]:
model = MLP(input_dim=X_train.shape[1], output_dim=Y_train.shape[1]).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
gamma = 2.0

In [22]:
epochs = 10
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_focal(train_dataloader, device, model, optimizer, gamma)
    test_focal(test_dataloader, device, model, gamma)
print("Done!")

Epoch 1
-------------------------------
loss: 0.146619  [  336/ 1688]
Test Error: 
 Accuracy: 83.3%, Avg loss: 0.131531 

Epoch 2
-------------------------------
loss: 0.121475  [  336/ 1688]
Test Error: 
 Accuracy: 83.3%, Avg loss: 0.106481 

Epoch 3
-------------------------------
loss: 0.117853  [  336/ 1688]
Test Error: 
 Accuracy: 83.3%, Avg loss: 0.102016 

Epoch 4
-------------------------------
loss: 0.124176  [  336/ 1688]
Test Error: 
 Accuracy: 83.3%, Avg loss: 0.103129 

Epoch 5
-------------------------------
loss: 0.117234  [  336/ 1688]
Test Error: 
 Accuracy: 83.3%, Avg loss: 0.104420 

Epoch 6
-------------------------------
loss: 0.107006  [  336/ 1688]
Test Error: 
 Accuracy: 83.3%, Avg loss: 0.099164 

Epoch 7
-------------------------------
loss: 0.115706  [  336/ 1688]
Test Error: 
 Accuracy: 83.3%, Avg loss: 0.101116 

Epoch 8
-------------------------------
loss: 0.106602  [  336/ 1688]
Test Error: 
 Accuracy: 83.3%, Avg loss: 0.101658 

Epoch 9
----------------

In [23]:
compute_scores(X_holdout, Y_holdout, device, model, batch_size, boot_iter)

Mean scores with SE and 95% confidence intervals:

auprc_macro:                  0.20 (0.01) [0.19, 0.22]
auprc_weighted:               0.34 (0.01) [0.32, 0.37]
auroc_macro:                  0.56 (0.01) [0.54, 0.58]
auroc_weighted:               0.55 (0.01) [0.53, 0.57]
brier_macro:                  0.17 (0.00) [0.17, 0.17]
brier_weighted:               0.03 (0.00) [0.03, 0.03]
balanced_accuracy_macro:      0.50 (0.00) [0.50, 0.50]
balanced_accuracy_weighted:   0.08 (0.00) [0.08, 0.08]
f1_micro:                     0.38 (0.01) [0.36, 0.40]
hamming:                      0.15 (0.00) [0.14, 0.16]
subset_accuracy:              0.10 (0.01) [0.08, 0.12]


# 2. Use undersampled dataset

In [24]:
X_under, Y_under = generate_undersampled_set(X, Y)
label_stats, mean_ir = generate_label_stats(Y_under, True)
print(f"Mean imbalance ratio: {mean_ir}")
label_stats

Mean imbalance ratio: 2.7700915195670985


Unnamed: 0,Absolute frequency,Relative frequency,Imbalance ratio
Trauma_And_Stress_RelatedDisorders,69,0.069277,3.26087
DepressiveDisorders,103,0.103414,2.184466
Attention_Deficit_HyperactivityDisorder,225,0.225904,1.0
MotorDisorder,68,0.068273,3.308824
AutismSpectrumDisorder,117,0.11747,1.923077
CommunicationDisorder,105,0.105422,2.142857
OtherDisorders,55,0.055221,4.090909
SpecificLearningDisorder,177,0.177711,1.271186
Obsessive_Compulsive_And_RelatedDisorders,52,0.052209,4.326923
Disruptive,103,0.103414,2.184466


In [25]:
# Split dataset into train and test (holdout) set
X_train, X_holdout, Y_train, Y_holdout = train_test_split(X_under, Y_under, test_size=0.25, random_state=0)
X_train, X_test, Y_train, Y_test = train_test_split(X_train, Y_train, test_size=0.2, random_state=1)
print(f"Number of samples in training set: {len(X_train)}")
print(f"Number of samples in test set: {len(X_test)}")
print(f"Number of samples in holdout set: {len(X_holdout)}")

Number of samples in training set: 597
Number of samples in test set: 150
Number of samples in holdout set: 249


In [26]:
training_data = DatasetBrainMeasures(X_train, Y_train) 
test_data = DatasetBrainMeasures(X_test, Y_test)
print(f"Size of training set: {len(training_data)}")
print(f"Size of test set: {len(test_data)}")

Size of training set: 597
Size of test set: 150


In [27]:
train_dataloader = DataLoader(training_data, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=batch_size, shuffle=True)

In [28]:
for X_, y_ in test_dataloader:
    print(f"Shape of X [batch_size, D]: {X_.shape}")
    print(f"Shape of Y [batch_size]: {y_.shape} {y_.dtype}")
    break

Shape of X [batch_size, D]: torch.Size([128, 922])
Shape of Y [batch_size]: torch.Size([128, 13]) torch.float32


---

In [29]:
model = MLP(input_dim=X_train.shape[1], output_dim=Y_train.shape[1]).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

In [30]:
loss_fn = nn.BCEWithLogitsLoss()

In [31]:
epochs = 10
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, device, model, loss_fn, optimizer)
    test(test_dataloader, device, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------
loss: 0.681541  [  425/  597]
Test Error: 
 Accuracy: 88.7%, Avg loss: 0.666303 

Epoch 2
-------------------------------
loss: 0.611891  [  425/  597]
Test Error: 
 Accuracy: 88.7%, Avg loss: 0.523864 

Epoch 3
-------------------------------
loss: 0.549339  [  425/  597]
Test Error: 
 Accuracy: 88.7%, Avg loss: 0.443928 

Epoch 4
-------------------------------
loss: 0.503259  [  425/  597]
Test Error: 
 Accuracy: 88.7%, Avg loss: 0.395660 

Epoch 5
-------------------------------
loss: 0.482092  [  425/  597]
Test Error: 
 Accuracy: 88.7%, Avg loss: 0.373292 

Epoch 6
-------------------------------
loss: 0.418296  [  425/  597]
Test Error: 
 Accuracy: 88.7%, Avg loss: 0.358413 

Epoch 7
-------------------------------
loss: 0.430667  [  425/  597]
Test Error: 
 Accuracy: 88.7%, Avg loss: 0.371025 

Epoch 8
-------------------------------
loss: 0.407411  [  425/  597]
Test Error: 
 Accuracy: 88.7%, Avg loss: 0.370705 

Epoch 9
----------------

In [32]:
compute_scores(X_holdout, Y_holdout, device, model, batch_size, boot_iter)

Mean scores with SE and 95% confidence intervals:

auprc_macro:                  0.13 (0.01) [0.11, 0.15]
auprc_weighted:               0.16 (0.01) [0.14, 0.19]
auroc_macro:                  0.53 (0.02) [0.49, 0.58]
auroc_weighted:               0.53 (0.02) [0.49, 0.56]
brier_macro:                  0.09 (0.00) [0.09, 0.10]
brier_weighted:               0.01 (0.00) [0.01, 0.01]
balanced_accuracy_macro:      0.50 (0.00) [0.50, 0.50]
balanced_accuracy_weighted:   0.05 (0.00) [0.05, 0.05]
f1_micro:                     0.00 (0.00) [0.00, 0.00]
hamming:                      0.10 (0.01) [0.09, 0.11]
subset_accuracy:              0.34 (0.03) [0.28, 0.41]


# 3. Use oversampled dataset

In [33]:
# Resample data (undersampling)
X_over, Y_over = generate_oversampled_set(X, Y)
label_stats, mean_ir = generate_label_stats(Y_over, True)
print(f"Mean imbalance ratio: {mean_ir}")
label_stats

Mean imbalance ratio: 1.6092872677464145


Unnamed: 0,Absolute frequency,Relative frequency,Imbalance ratio
Trauma_And_Stress_RelatedDisorders,2580,0.116174,1.923256
DepressiveDisorders,3170,0.142741,1.5653
Attention_Deficit_HyperactivityDisorder,4582,0.206322,1.082933
MotorDisorder,3134,0.14112,1.58328
AutismSpectrumDisorder,3689,0.166111,1.34508
CommunicationDisorder,4431,0.199523,1.119838
OtherDisorders,2320,0.104467,2.138793
SpecificLearningDisorder,4962,0.223433,1.0
Obsessive_Compulsive_And_RelatedDisorders,2668,0.120137,1.85982
Disruptive,2801,0.126126,1.77151


In [34]:
# Split dataset into train and test (holdout) set
X_train, X_holdout, Y_train, Y_holdout = train_test_split(X_over, Y_over, test_size=0.25, random_state=0)
X_train, X_test, Y_train, Y_test = train_test_split(X_train, Y_train, test_size=0.2, random_state=1)
print(f"Number of samples in training set: {len(X_train)}")
print(f"Number of samples in test set: {len(X_test)}")
print(f"Number of samples in holdout set: {len(X_holdout)}")

Number of samples in training set: 13324
Number of samples in test set: 3332
Number of samples in holdout set: 5552


In [35]:
training_data = DatasetBrainMeasures(X_train, Y_train) 
test_data = DatasetBrainMeasures(X_test, Y_test)
print(f"Size of training set: {len(training_data)}")
print(f"Size of test set: {len(test_data)}")

Size of training set: 13324
Size of test set: 3332


In [36]:
train_dataloader = DataLoader(training_data, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=batch_size, shuffle=True)

In [37]:
for X_, y_ in test_dataloader:
    print(f"Shape of X [batch_size, D]: {X_.shape}")
    print(f"Shape of Y [batch_size]: {y_.shape} {y_.dtype}")
    break

Shape of X [batch_size, D]: torch.Size([128, 922])
Shape of Y [batch_size]: torch.Size([128, 13]) torch.float32


---

In [38]:
model = MLP(input_dim=X_train.shape[1], output_dim=Y_train.shape[1]).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

In [39]:
loss_fn = nn.BCEWithLogitsLoss()

In [40]:
epochs = 20
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, device, model, loss_fn, optimizer)
    test(test_dataloader, device, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------
loss: 0.447978  [ 1260/13324]
Test Error: 
 Accuracy: 85.3%, Avg loss: 0.397421 

Epoch 2
-------------------------------
loss: 0.369598  [ 1260/13324]
Test Error: 
 Accuracy: 85.4%, Avg loss: 0.353186 

Epoch 3
-------------------------------
loss: 0.312855  [ 1260/13324]
Test Error: 
 Accuracy: 86.6%, Avg loss: 0.311824 

Epoch 4
-------------------------------
loss: 0.298815  [ 1260/13324]
Test Error: 
 Accuracy: 87.2%, Avg loss: 0.286409 

Epoch 5
-------------------------------
loss: 0.319445  [ 1260/13324]
Test Error: 
 Accuracy: 88.5%, Avg loss: 0.264687 

Epoch 6
-------------------------------
loss: 0.224274  [ 1260/13324]
Test Error: 
 Accuracy: 88.9%, Avg loss: 0.243310 

Epoch 7
-------------------------------
loss: 0.403181  [ 1260/13324]
Test Error: 
 Accuracy: 89.1%, Avg loss: 0.231602 

Epoch 8
-------------------------------
loss: 0.422065  [ 1260/13324]
Test Error: 
 Accuracy: 89.4%, Avg loss: 0.222363 

Epoch 9
----------------

In [41]:
compute_scores(X_holdout, Y_holdout, device, model, batch_size, boot_iter)

Mean scores with SE and 95% confidence intervals:

auprc_macro:                  0.86 (0.00) [0.85, 0.86]
auprc_weighted:               0.83 (0.00) [0.82, 0.84]
auroc_macro:                  0.96 (0.00) [0.96, 0.96]
auroc_weighted:               0.95 (0.00) [0.95, 0.95]
brier_macro:                  0.05 (0.00) [0.05, 0.05]
brier_weighted:               0.01 (0.00) [0.01, 0.01]
balanced_accuracy_macro:      0.81 (0.00) [0.81, 0.82]
balanced_accuracy_weighted:   0.12 (0.00) [0.12, 0.12]
f1_micro:                     0.71 (0.00) [0.70, 0.72]
hamming:                      0.07 (0.00) [0.07, 0.07]
subset_accuracy:              0.48 (0.01) [0.47, 0.50]
