In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from utils.utils import load_data, remove_zero_features, standardize
from utils.utils import generate_oversampled_set, generate_undersampled_set, generate_label_stats

from utils.mlp_utils import DatasetBrainMeasures
from utils.mlp_train import train, test, train_focal, test_focal, compute_scores
from utils.mlp_model import MLP

from sklearn.model_selection import train_test_split

import torch
from torch import nn
from torch.utils.data import DataLoader

In [2]:
plot_path = "plots/"
checkpoints_path = "checkpoints/"

In [3]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using mps device


---

In [4]:
# Load data for classification task
subject_data, features, diagnoses = load_data('classification')

In [5]:
subject_data

Unnamed: 0,ID,release_number,Bold.Sequence,Sex,Age,EHQ_Total,Commercial_Use,Full_Pheno,Site,Field_Strength,Cohort
0,NDARDY714NV9,R3,abcd,Male,13.773214,74.57,Yes,Yes,CBIC,3.0,5
1,NDAREG930XPP,R3,cmrr,Male,7.995779,73.37,Yes,Yes,CBIC,3.0,2
2,NDAREK255DEE,R3,cmrr,Female,6.508898,26.68,Yes,Yes,CBIC,3.0,2
3,NDARFB757VY3,R3,abcd,Male,5.851813,-6.67,Yes,Yes,CBIC,3.0,2
4,NDARFJ803JF7,R3,abcd,Female,16.335843,61.16,Yes,Yes,CBIC,3.0,6
...,...,...,...,...,...,...,...,...,...,...,...
2810,NDARZP564MHU,R1,,Male,20.910107,100.05,No,Yes,SI,1.5,6
2811,NDARZR567HWG,R1,,Female,13.260552,77.84,No,Yes,SI,1.5,5
2812,NDARZT772PU4,R1,,Female,17.707278,16.68,No,Yes,SI,1.5,6
2813,NDARZV766YXP,R1,,Male,10.788272,84.51,No,Yes,SI,1.5,4


In [6]:
# Remove zero features
F = remove_zero_features(features.iloc[:,1:])

In [7]:
# Standardize
X = standardize(F)
print(f"Number of samples: {X.shape[0]}")
print(f"Number of features: {X.shape[1]}")

Number of samples: 2815
Number of features: 922


In [8]:
# Remove ID column
Y = diagnoses.iloc[:,1:]
print(f"Number of labels: {Y.shape[1]}")

Number of labels: 13


In [9]:
boot_iter = 100

In [10]:
batch_size = 128

---

# 1. Use dataset with original label distribution (no resampling)

In [11]:
# Split dataset into train and test (holdout) set
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.25, random_state=0)
print(f"Number of samples in training set: {len(X_train)}")
print(f"Number of samples in test set: {len(X_test)}")

Number of samples in training set: 2111
Number of samples in test set: 704


In [12]:
training_data = DatasetBrainMeasures(X_train, Y_train) 
test_data = DatasetBrainMeasures(X_test, Y_test)
print(f"Size of training set: {len(training_data)}")
print(f"Size of test set: {len(test_data)}")

Size of training set: 2111
Size of test set: 704


In [13]:
train_dataloader = DataLoader(training_data, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=batch_size, shuffle=True)

In [14]:
for X_, y_ in test_dataloader:
    print(f"Shape of X [batch_size, D]: {X_.shape}")
    print(f"Shape of Y [batch_size]: {y_.shape} {y_.dtype}")
    break

Shape of X [batch_size, D]: torch.Size([128, 922])
Shape of Y [batch_size]: torch.Size([128, 13]) torch.float32


---

## 1.1. BCE loss

In [15]:
model = MLP(input_dim=X_train.shape[1], output_dim=Y_train.shape[1]).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

In [16]:
loss_fn = nn.BCEWithLogitsLoss()

In [17]:
epochs = 10
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, device, model, loss_fn, optimizer)
    test(test_dataloader, device, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------


loss: 0.492065  [ 1071/ 2111]
Test Error: 
 Accuracy: 84.6%, Avg loss: 0.435458 

Epoch 2
-------------------------------
loss: 0.467306  [ 1071/ 2111]
Test Error: 
 Accuracy: 84.8%, Avg loss: 0.403875 

Epoch 3
-------------------------------
loss: 0.439433  [ 1071/ 2111]
Test Error: 
 Accuracy: 84.8%, Avg loss: 0.387058 

Epoch 4
-------------------------------
loss: 0.410764  [ 1071/ 2111]
Test Error: 
 Accuracy: 84.6%, Avg loss: 0.386344 

Epoch 5
-------------------------------
loss: 0.410884  [ 1071/ 2111]
Test Error: 
 Accuracy: 83.6%, Avg loss: 0.390633 

Epoch 6
-------------------------------
loss: 0.418411  [ 1071/ 2111]
Test Error: 
 Accuracy: 84.0%, Avg loss: 0.380682 

Epoch 7
-------------------------------
loss: 0.376663  [ 1071/ 2111]
Test Error: 
 Accuracy: 83.5%, Avg loss: 0.389160 

Epoch 8
-------------------------------
loss: 0.366217  [ 1071/ 2111]
Test Error: 
 Accuracy: 83.8%, Avg loss: 0.387119 

Epoch 9
-------------------------------
loss: 0.382777  [ 1071/ 

In [18]:
compute_scores(X_test, Y_test, device, model, batch_size, boot_iter)

Mean scores with SE and 95% confidence intervals:

auprc_macro:                  0.20 (0.01) [0.19, 0.21]
auprc_weighted:               0.34 (0.01) [0.32, 0.36]
auroc_macro:                  0.56 (0.01) [0.53, 0.58]
auroc_weighted:               0.55 (0.01) [0.53, 0.56]
brier_macro:                  0.12 (0.00) [0.11, 0.12]
brier_weighted:               0.03 (0.00) [0.03, 0.03]
balanced_accuracy_macro:      0.50 (0.00) [0.50, 0.50]
balanced_accuracy_weighted:   0.08 (0.00) [0.08, 0.08]
f1_micro:                     0.38 (0.01) [0.36, 0.40]
hamming:                      0.15 (0.00) [0.14, 0.16]
subset_accuracy:              0.10 (0.01) [0.08, 0.12]


---

## 1.2. Focal loss

In [19]:
model = MLP(input_dim=X_train.shape[1], output_dim=Y_train.shape[1]).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
gamma = 2.0

In [20]:
epochs = 10
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_focal(train_dataloader, device, model, optimizer, gamma)
    test_focal(test_dataloader, device, model, gamma)
print("Done!")

Epoch 1
-------------------------------


loss: 0.145564  [ 1071/ 2111]
Test Error: 
 Accuracy: 83.3%, Avg loss: 0.124443 

Epoch 2
-------------------------------
loss: 0.119060  [ 1071/ 2111]
Test Error: 
 Accuracy: 83.3%, Avg loss: 0.109184 

Epoch 3
-------------------------------
loss: 0.109142  [ 1071/ 2111]
Test Error: 
 Accuracy: 83.3%, Avg loss: 0.103832 

Epoch 4
-------------------------------
loss: 0.110452  [ 1071/ 2111]
Test Error: 
 Accuracy: 83.3%, Avg loss: 0.107888 

Epoch 5
-------------------------------
loss: 0.109459  [ 1071/ 2111]
Test Error: 
 Accuracy: 83.3%, Avg loss: 0.104338 

Epoch 6
-------------------------------
loss: 0.108868  [ 1071/ 2111]
Test Error: 
 Accuracy: 83.3%, Avg loss: 0.104638 

Epoch 7
-------------------------------
loss: 0.103949  [ 1071/ 2111]
Test Error: 
 Accuracy: 83.3%, Avg loss: 0.103892 

Epoch 8
-------------------------------
loss: 0.098755  [ 1071/ 2111]
Test Error: 
 Accuracy: 83.3%, Avg loss: 0.103468 

Epoch 9
-------------------------------
loss: 0.089774  [ 1071/ 

In [21]:
compute_scores(X_test, Y_test, device, model, batch_size, boot_iter)

Mean scores with SE and 95% confidence intervals:

auprc_macro:                  0.19 (0.01) [0.18, 0.21]
auprc_weighted:               0.34 (0.01) [0.32, 0.36]
auroc_macro:                  0.55 (0.01) [0.53, 0.57]
auroc_weighted:               0.54 (0.01) [0.52, 0.56]
brier_macro:                  0.16 (0.00) [0.16, 0.16]
brier_weighted:               0.03 (0.00) [0.03, 0.03]
balanced_accuracy_macro:      0.50 (0.00) [0.50, 0.50]
balanced_accuracy_weighted:   0.08 (0.00) [0.08, 0.08]
f1_micro:                     0.38 (0.01) [0.36, 0.40]
hamming:                      0.15 (0.00) [0.14, 0.16]
subset_accuracy:              0.10 (0.01) [0.08, 0.12]


# 2. Use undersampled dataset

In [32]:
# Split dataset into train and test (holdout) set
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.25, random_state=0)

In [33]:
X_under, Y_under = generate_undersampled_set(X_train, Y_train)
label_stats, mean_ir = generate_label_stats(Y_under, True)
print(f"Mean imbalance ratio: {mean_ir}")
label_stats

Mean imbalance ratio: 2.956543202759316


Unnamed: 0,Absolute frequency,Relative frequency,Imbalance ratio
Trauma_And_Stress_RelatedDisorders,38,0.051701,4.473684
DepressiveDisorders,80,0.108844,2.125
Attention_Deficit_HyperactivityDisorder,170,0.231293,1.0
MotorDisorder,52,0.070748,3.269231
AutismSpectrumDisorder,101,0.137415,1.683168
CommunicationDisorder,75,0.102041,2.266667
OtherDisorders,35,0.047619,4.857143
SpecificLearningDisorder,128,0.17415,1.328125
Obsessive_Compulsive_And_RelatedDisorders,32,0.043537,5.3125
Disruptive,74,0.10068,2.297297


In [34]:
X_train, Y_train = X_under, Y_under
print(f"Number of samples in training set: {len(X_train)}")
print(f"Number of samples in test set: {len(X_test)}")

Number of samples in training set: 735
Number of samples in test set: 704


In [35]:
training_data = DatasetBrainMeasures(X_train, Y_train) 
test_data = DatasetBrainMeasures(X_test, Y_test)
print(f"Size of training set: {len(training_data)}")
print(f"Size of test set: {len(test_data)}")

Size of training set: 735
Size of test set: 704


In [26]:
train_dataloader = DataLoader(training_data, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=batch_size, shuffle=True)

In [27]:
for X_, y_ in test_dataloader:
    print(f"Shape of X [batch_size, D]: {X_.shape}")
    print(f"Shape of Y [batch_size]: {y_.shape} {y_.dtype}")
    break

Shape of X [batch_size, D]: torch.Size([128, 922])
Shape of Y [batch_size]: torch.Size([128, 13]) torch.float32


---

In [28]:
model = MLP(input_dim=X_train.shape[1], output_dim=Y_train.shape[1]).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

In [29]:
loss_fn = nn.BCEWithLogitsLoss()

In [30]:
epochs = 10
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, device, model, loss_fn, optimizer)
    test(test_dataloader, device, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------


loss: 0.647824  [  570/  735]
Test Error: 
 Accuracy: 84.4%, Avg loss: 0.609645 

Epoch 2
-------------------------------
loss: 0.598936  [  570/  735]
Test Error: 
 Accuracy: 84.8%, Avg loss: 0.503692 

Epoch 3
-------------------------------
loss: 0.523614  [  570/  735]
Test Error: 
 Accuracy: 84.3%, Avg loss: 0.470026 

Epoch 4
-------------------------------
loss: 0.478344  [  570/  735]
Test Error: 
 Accuracy: 83.3%, Avg loss: 0.432449 

Epoch 5
-------------------------------
loss: 0.441635  [  570/  735]
Test Error: 
 Accuracy: 83.3%, Avg loss: 0.418663 

Epoch 6
-------------------------------
loss: 0.418031  [  570/  735]
Test Error: 
 Accuracy: 83.3%, Avg loss: 0.415452 

Epoch 7
-------------------------------
loss: 0.400661  [  570/  735]
Test Error: 
 Accuracy: 83.3%, Avg loss: 0.409977 

Epoch 8
-------------------------------
loss: 0.409955  [  570/  735]
Test Error: 
 Accuracy: 83.3%, Avg loss: 0.408881 

Epoch 9
-------------------------------
loss: 0.412401  [  570/ 

In [31]:
compute_scores(X_test, Y_test, device, model, batch_size, boot_iter)

Mean scores with SE and 95% confidence intervals:

auprc_macro:                  0.21 (0.01) [0.20, 0.22]
auprc_weighted:               0.36 (0.01) [0.34, 0.38]
auroc_macro:                  0.57 (0.01) [0.55, 0.59]
auroc_weighted:               0.57 (0.01) [0.54, 0.59]
brier_macro:                  0.13 (0.00) [0.12, 0.13]
brier_weighted:               0.03 (0.00) [0.03, 0.03]
balanced_accuracy_macro:      0.50 (0.00) [0.50, 0.50]
balanced_accuracy_weighted:   0.08 (0.00) [0.08, 0.08]
f1_micro:                     0.00 (0.00) [0.00, 0.00]
hamming:                      0.17 (0.00) [0.16, 0.17]
subset_accuracy:              0.10 (0.01) [0.08, 0.12]


# 3. Use oversampled dataset

In [36]:
# Split dataset into train and test (holdout) set
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.25, random_state=0)

In [37]:
# Resample data (undersampling)
X_over, Y_over = generate_oversampled_set(X_train, Y_train)
label_stats, mean_ir = generate_label_stats(Y_over, True)
print(f"Mean imbalance ratio: {mean_ir}")
label_stats

Mean imbalance ratio: 1.6752910161710401


Unnamed: 0,Absolute frequency,Relative frequency,Imbalance ratio
Trauma_And_Stress_RelatedDisorders,1895,0.112296,2.105541
DepressiveDisorders,2577,0.152711,1.548312
Attention_Deficit_HyperactivityDisorder,3496,0.20717,1.141304
MotorDisorder,2386,0.141393,1.672255
AutismSpectrumDisorder,2820,0.167111,1.414894
CommunicationDisorder,3459,0.204978,1.153513
OtherDisorders,1769,0.10483,2.255512
SpecificLearningDisorder,3990,0.236444,1.0
Obsessive_Compulsive_And_RelatedDisorders,2120,0.12563,1.882075
Disruptive,2080,0.123259,1.918269


In [38]:
X_train, Y_train = X_over, Y_over
print(f"Number of samples in training set: {len(X_train)}")
print(f"Number of samples in test set: {len(X_test)}")

Number of samples in training set: 16875
Number of samples in test set: 704


In [39]:
training_data = DatasetBrainMeasures(X_train, Y_train) 
test_data = DatasetBrainMeasures(X_test, Y_test)
print(f"Size of training set: {len(training_data)}")
print(f"Size of test set: {len(test_data)}")

Size of training set: 16875
Size of test set: 704


In [40]:
train_dataloader = DataLoader(training_data, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=batch_size, shuffle=True)

In [41]:
for X_, y_ in test_dataloader:
    print(f"Shape of X [batch_size, D]: {X_.shape}")
    print(f"Shape of Y [batch_size]: {y_.shape} {y_.dtype}")
    break

Shape of X [batch_size, D]: torch.Size([128, 922])
Shape of Y [batch_size]: torch.Size([128, 13]) torch.float32


---

In [42]:
model = MLP(input_dim=X_train.shape[1], output_dim=Y_train.shape[1]).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

In [43]:
loss_fn = nn.BCEWithLogitsLoss()

In [45]:
epochs = 10
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, device, model, loss_fn, optimizer)
    test(test_dataloader, device, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------


loss: 0.147634  [14124/16875]
Test Error: 
 Accuracy: 84.3%, Avg loss: 0.438117 

Epoch 2
-------------------------------
loss: 0.218950  [14124/16875]
Test Error: 
 Accuracy: 84.3%, Avg loss: 0.430178 

Epoch 3
-------------------------------
loss: 0.187419  [14124/16875]
Test Error: 
 Accuracy: 84.5%, Avg loss: 0.427287 

Epoch 4
-------------------------------
loss: 0.204801  [14124/16875]
Test Error: 
 Accuracy: 84.5%, Avg loss: 0.431450 

Epoch 5
-------------------------------
loss: 0.162329  [14124/16875]
Test Error: 
 Accuracy: 84.5%, Avg loss: 0.428462 

Epoch 6
-------------------------------
loss: 0.169589  [14124/16875]
Test Error: 
 Accuracy: 84.5%, Avg loss: 0.437055 

Epoch 7
-------------------------------
loss: 0.151113  [14124/16875]
Test Error: 
 Accuracy: 84.5%, Avg loss: 0.432552 

Epoch 8
-------------------------------
loss: 0.181620  [14124/16875]
Test Error: 
 Accuracy: 84.6%, Avg loss: 0.437984 

Epoch 9
-------------------------------
loss: 0.183761  [14124/1

In [46]:
compute_scores(X_test, Y_test, device, model, batch_size, boot_iter)

Mean scores with SE and 95% confidence intervals:

auprc_macro:                  0.21 (0.01) [0.19, 0.22]
auprc_weighted:               0.35 (0.01) [0.33, 0.37]
auroc_macro:                  0.55 (0.01) [0.53, 0.57]
auroc_weighted:               0.54 (0.01) [0.52, 0.56]
brier_macro:                  0.12 (0.00) [0.12, 0.13]
brier_weighted:               0.03 (0.00) [0.03, 0.03]
balanced_accuracy_macro:      0.51 (0.00) [0.50, 0.51]
balanced_accuracy_weighted:   0.08 (0.00) [0.08, 0.09]
f1_micro:                     0.42 (0.01) [0.40, 0.44]
hamming:                      0.17 (0.00) [0.17, 0.18]
subset_accuracy:              0.05 (0.01) [0.03, 0.06]
