In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

from utils.utils import load_data, remove_zero_features, standardize
from utils.convnet_utils import datasetT1
from utils.convnet_train import train, test, eval
from utils.convnet_model import SFCN

from sklearn.model_selection import train_test_split
from sklearn.utils import resample
from sklearn.metrics import average_precision_score, roc_auc_score, brier_score_loss, f1_score, hamming_loss

import torch
from torch import nn
from torch.utils.data import DataLoader

In [2]:
user_dir = os.path.expanduser("~")
base_dir = user_dir + '/t1images/'

plot_path = "plots/"
weights_path = "weights/"

In [3]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using mps device


In [16]:
# Load data for classification set
X, _, Y = load_data('classification_t1')
print(f"Size of T1 set: {X.shape[0]}")

Size of T1 set: 2491


In [17]:
X_train, X_test, Y_train, Y_test = train_test_split(X.iloc[:,0], Y.iloc[:,1:], test_size=0.25, random_state=0)
print(f"Size training set: {X_train.shape[0]}")
print(f"Size test set: {X_test.shape[0]}")

Size training set: 1868
Size test set: 623


---

In [21]:
training_data = datasetT1(X_train, Y_train, modality='T1w', base_dir=base_dir) 
test_data = datasetT1(X_test, Y_test, modality='T1w', base_dir=base_dir) 
print(f"Size of training set: {len(training_data)}")
print(f"Size of test set: {len(test_data)}")

Size of training set: 1868
Size of test set: 623


In [None]:
batch_size = 2

In [22]:
train_dataloader = DataLoader(training_data, batch_size=batch_size, shuffle=True) 
test_dataloader = DataLoader(test_data, batch_size=batch_size, shuffle=True)

In [None]:
model = SFCN(output_dim=13)
model.to(device)

In [None]:
loss_fn = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

In [None]:
epochs = 1
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, device, model, loss_fn, optimizer)
    test(test_dataloader, device, model, loss_fn)
print("Done!")

---

In [None]:
auprc = []
auroc = []
brier = []
hamm = []
f1 = []

eval_dataloader = DataLoader(test_data, batch_size=batch_size, shuffle=False)

for i in range(100):
    X_test_resampled, y_test_resampled = resample(X_test, Y_test, replace=True, n_samples=len(Y_test), random_state=0+i)

    y_prob, y_pred  = eval(eval_dataloader, device, model, loss_fn)
    
    # Compute brier score
    brier_scores = np.zeros(y_prob.shape[1])
    for i in range(y_prob.shape[1]):
        brier_scores[i] = brier_score_loss(y_test_resampled.iloc[:,i], y_prob[:,i])
    brier.append(brier_scores.mean())
    
    # Other metrics
    auprc.append(average_precision_score(y_test_resampled, y_prob, average='macro'))
    auroc.append(roc_auc_score(y_test_resampled, y_prob, average='macro'))
    f1.append(f1_score(y_test_resampled, y_pred, average='micro'))
    hamm.append(hamming_loss(y_test_resampled, y_pred))

print(f"Mean scores for always zero baseline with MultiOutputClassifier with 95% confidence intervals:")
print("    AUPRC macro: {:.2f} [{:.2f}, {:.2f}]".format(np.mean(auprc), np.percentile(auprc, 2.5), np.percentile(auprc, 97.5)))
print("    AUROC macro: {:.2f} [{:.2f}, {:.2f}]".format(np.mean(auroc), np.percentile(auroc, 2.5), np.percentile(auroc, 97.5)))
print("    Brier score: {:.2f} [{:.2f}, {:.2f}]".format(np.mean(brier), np.percentile(brier, 2.5), np.percentile(brier, 97.5)))
print("    Hamming loss: {:.2f} [{:.2f}, {:.2f}]".format(np.mean(hamm), np.percentile(hamm, 2.5), np.percentile(hamm, 97.5)))
print("    Micro Avg F1 score: {:.2f} [{:.2f}, {:.2f}]".format(np.mean(f1), np.percentile(f1, 2.5), np.percentile(f1, 97.5)))