In [28]:
import pandas as pd
import numpy as np
import joblib
import matplotlib.pyplot as plt

from utils.utils import load_data, remove_zero_features, load_confounders, standardize, label_freq_sorted, pca_transform
from utils.utils import generate_oversampled_set, generate_undersampled_set, generate_label_stats
from utils.utils import compute_scores

from sklearn.dummy import DummyClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.multioutput import MultiOutputClassifier, ClassifierChain

from sklearn.model_selection import train_test_split

In [29]:
N_CORES = joblib.cpu_count(only_physical_cores=True)
print(f"Number of physical cores: {N_CORES}")

Number of physical cores: 8


---

In [30]:
# Load data for classification task
subject_data, features, diagnoses = load_data('classification')

In [31]:
# Remove zero features
F = remove_zero_features(features.iloc[:,1:])

In [32]:
# Load confounders
C = load_confounders(subject_data)

In [33]:
# Standardize
X = standardize(F)
print(f"Number of samples: {X.shape[0]}")
print(f"Number of features: {X.shape[1]}")

Number of samples: 2815
Number of features: 922


In [34]:
# Remove ID column
Y = diagnoses.iloc[:,1:]
print(f"Number of labels: {Y.shape[1]}")

Number of labels: 13


In [35]:
boot_iter = 100

# 2. Use undersampled dataset

In [39]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.25, random_state=0)

In [40]:
X_under, Y_under = generate_undersampled_set(X_train, Y_train)
label_stats, mean_ir = generate_label_stats(Y_under, True)
print(f"Mean imbalance ratio: {mean_ir}")
label_stats

Mean imbalance ratio: 2.956543202759316


Unnamed: 0,Absolute frequency,Relative frequency,Imbalance ratio
Trauma_And_Stress_RelatedDisorders,38,0.051701,4.473684
DepressiveDisorders,80,0.108844,2.125
Attention_Deficit_HyperactivityDisorder,170,0.231293,1.0
MotorDisorder,52,0.070748,3.269231
AutismSpectrumDisorder,101,0.137415,1.683168
CommunicationDisorder,75,0.102041,2.266667
OtherDisorders,35,0.047619,4.857143
SpecificLearningDisorder,128,0.17415,1.328125
Obsessive_Compulsive_And_RelatedDisorders,32,0.043537,5.3125
Disruptive,74,0.10068,2.297297


In [41]:
X_train, Y_train = X_under, Y_under

In [42]:
# Confounder 
C_train, C_test = C.loc[X_train.index], C.loc[X_test.index]

# PCA features
X_pca = pca_transform(F)
X_pca_train, X_pca_test = X_pca.loc[X_train.index], X_pca.loc[X_test.index]

print(f"Number of samples in training set: {len(X_train)}")
print(f"Number of samples in test set: {len(X_test)}")

Number of samples in training set: 735
Number of samples in test set: 704


## 2.1. MultiOutputClassifier
Evaluate classification models wrapped in meta estimator MultiOutputClassifier with respect to multi-label performance metrics

### 2.1.1. Confounders as features

#### 2.1.1.1. Logistic regression

In [43]:
clf = LogisticRegression(max_iter=10000, random_state=0)
meta_clf = MultiOutputClassifier(clf).fit(C_train, Y_train)
compute_scores(meta_clf, C_test, Y_test, boot_iter)

Mean scores with SE and 95% confidence intervals:

auprc_macro:                  0.18 (0.01) [0.16, 0.19]
auprc_weighted:               0.31 (0.01) [0.29, 0.32]
auroc_macro:                  0.49 (0.01) [0.46, 0.51]
auroc_weighted:               0.49 (0.01) [0.47, 0.50]
brier_macro:                  0.13 (0.00) [0.12, 0.14]
brier_weighted:               0.04 (0.00) [0.03, 0.04]
balanced_accuracy_macro:      0.50 (0.00) [0.50, 0.50]
balanced_accuracy_weighted:   0.08 (0.00) [0.08, 0.08]
f1_micro:                     0.00 (0.00) [0.00, 0.00]
hamming:                      0.17 (0.00) [0.16, 0.17]
subset_accuracy:              0.10 (0.01) [0.08, 0.12]


#### 2.1.1.1. Histogram-based Gradient Boosting

In [44]:
clf = HistGradientBoostingClassifier(random_state=0)
meta_clf = MultiOutputClassifier(clf, n_jobs=-1).fit(C_train,Y_train)
compute_scores(meta_clf, C_test, Y_test, boot_iter)

Mean scores with SE and 95% confidence intervals:

auprc_macro:                  0.18 (0.01) [0.16, 0.19]
auprc_weighted:               0.31 (0.01) [0.30, 0.34]
auroc_macro:                  0.48 (0.01) [0.46, 0.50]
auroc_weighted:               0.49 (0.01) [0.47, 0.51]
brier_macro:                  0.15 (0.00) [0.14, 0.16]
brier_weighted:               0.04 (0.00) [0.04, 0.04]
balanced_accuracy_macro:      0.50 (0.00) [0.50, 0.51]
balanced_accuracy_weighted:   0.08 (0.00) [0.08, 0.09]
f1_micro:                     0.09 (0.01) [0.07, 0.10]
hamming:                      0.18 (0.00) [0.17, 0.19]
subset_accuracy:              0.08 (0.01) [0.06, 0.10]


### 2.1.2. PCA-projected data (top-10 components) as features 

#### 2.1.2.1. Logistic regression

In [45]:
clf = LogisticRegression(max_iter=10000, random_state=0)
meta_clf = MultiOutputClassifier(clf).fit(X_pca_train, Y_train)
compute_scores(meta_clf, X_pca_test, Y_test, boot_iter)

Mean scores with SE and 95% confidence intervals:

auprc_macro:                  0.22 (0.01) [0.20, 0.24]
auprc_weighted:               0.36 (0.01) [0.34, 0.38]
auroc_macro:                  0.59 (0.01) [0.57, 0.60]
auroc_weighted:               0.56 (0.01) [0.54, 0.58]
brier_macro:                  0.13 (0.00) [0.12, 0.13]
brier_weighted:               0.04 (0.00) [0.03, 0.04]
balanced_accuracy_macro:      0.50 (0.00) [0.50, 0.50]
balanced_accuracy_weighted:   0.08 (0.00) [0.08, 0.08]
f1_micro:                     0.01 (0.01) [0.00, 0.02]
hamming:                      0.17 (0.00) [0.16, 0.17]
subset_accuracy:              0.10 (0.01) [0.08, 0.12]


#### 2.1.2.2. Histogram-based Gradient Boosting

In [16]:
clf = HistGradientBoostingClassifier(random_state=0)
meta_clf = MultiOutputClassifier(clf, n_jobs=-1).fit(X_pca_train,Y_train)
compute_scores(meta_clf, X_pca_test, Y_test, boot_iter)

Mean scores with SE and 95% confidence intervals:

auprc_macro:                  0.14 (0.01) [0.12, 0.17]
auprc_weighted:               0.17 (0.02) [0.14, 0.20]
auroc_macro:                  0.54 (0.02) [0.49, 0.59]
auroc_weighted:               0.54 (0.02) [0.50, 0.58]
brier_macro:                  0.10 (0.01) [0.09, 0.11]
brier_weighted:               0.01 (0.00) [0.01, 0.01]
balanced_accuracy_macro:      0.50 (0.00) [0.49, 0.51]
balanced_accuracy_weighted:   0.05 (0.00) [0.05, 0.05]
f1_micro:                     0.08 (0.02) [0.05, 0.11]
hamming:                      0.12 (0.01) [0.11, 0.13]
subset_accuracy:              0.25 (0.03) [0.20, 0.30]


### 2.1.3. Original features (standardized)

#### 2.1.3.1. Logistic regression

In [46]:
clf = LogisticRegression(max_iter=10000, random_state=0)
meta_clf = MultiOutputClassifier(clf).fit(X_train, Y_train)
compute_scores(meta_clf, X_test, Y_test, boot_iter)

Mean scores with SE and 95% confidence intervals:

auprc_macro:                  0.20 (0.01) [0.19, 0.23]
auprc_weighted:               0.34 (0.01) [0.32, 0.37]
auroc_macro:                  0.55 (0.01) [0.53, 0.57]
auroc_weighted:               0.53 (0.01) [0.51, 0.55]
brier_macro:                  0.18 (0.00) [0.18, 0.19]
brier_weighted:               0.05 (0.00) [0.05, 0.05]
balanced_accuracy_macro:      0.51 (0.01) [0.50, 0.52]
balanced_accuracy_weighted:   0.09 (0.00) [0.08, 0.09]
f1_micro:                     0.22 (0.01) [0.20, 0.24]
hamming:                      0.21 (0.00) [0.20, 0.22]
subset_accuracy:              0.06 (0.01) [0.04, 0.07]


#### 2.1.3.2. SVM

In [47]:
clf = SVC(kernel='rbf', gamma='scale', probability=True, random_state=0)
meta_clf = MultiOutputClassifier(clf, n_jobs=-1).fit(X_train, Y_train)
compute_scores(meta_clf, X_test, Y_test, boot_iter)

Mean scores with SE and 95% confidence intervals:

auprc_macro:                  0.21 (0.01) [0.19, 0.23]
auprc_weighted:               0.34 (0.01) [0.32, 0.37]
auroc_macro:                  0.55 (0.01) [0.53, 0.57]
auroc_weighted:               0.54 (0.01) [0.52, 0.56]
brier_macro:                  0.13 (0.00) [0.12, 0.13]
brier_weighted:               0.04 (0.00) [0.03, 0.04]
balanced_accuracy_macro:      0.50 (0.00) [0.50, 0.50]
balanced_accuracy_weighted:   0.08 (0.00) [0.08, 0.08]
f1_micro:                     0.00 (0.00) [0.00, 0.00]
hamming:                      0.17 (0.00) [0.16, 0.17]
subset_accuracy:              0.10 (0.01) [0.08, 0.12]


#### 2.1.3.3. Histogram-based Gradient Boosting

In [20]:
clf = HistGradientBoostingClassifier(random_state=0)
meta_clf = MultiOutputClassifier(clf).fit(X_train, Y_train) # n_jobs=-1
compute_scores(meta_clf, X_test, Y_test, boot_iter)

Mean scores with SE and 95% confidence intervals:

auprc_macro:                  0.17 (0.02) [0.14, 0.20]
auprc_weighted:               0.20 (0.02) [0.17, 0.24]
auroc_macro:                  0.59 (0.02) [0.55, 0.63]
auroc_weighted:               0.59 (0.02) [0.55, 0.62]
brier_macro:                  0.10 (0.01) [0.09, 0.11]
brier_weighted:               0.01 (0.00) [0.01, 0.01]
balanced_accuracy_macro:      0.50 (0.00) [0.50, 0.51]
balanced_accuracy_weighted:   0.05 (0.00) [0.05, 0.05]
f1_micro:                     0.03 (0.01) [0.01, 0.06]
hamming:                      0.10 (0.01) [0.09, 0.12]
subset_accuracy:              0.32 (0.03) [0.26, 0.37]


#### 2.1.3.4. MLP

In [48]:
clf = MLPClassifier(random_state=0)
meta_clf = MultiOutputClassifier(clf).fit(X_train, Y_train) # n_jobs=-1
compute_scores(meta_clf, X_test, Y_test, boot_iter)

Mean scores with SE and 95% confidence intervals:

auprc_macro:                  0.21 (0.01) [0.19, 0.23]
auprc_weighted:               0.35 (0.01) [0.33, 0.38]
auroc_macro:                  0.55 (0.01) [0.53, 0.58]
auroc_weighted:               0.55 (0.01) [0.53, 0.57]
brier_macro:                  0.16 (0.00) [0.15, 0.17]
brier_weighted:               0.05 (0.00) [0.04, 0.05]
balanced_accuracy_macro:      0.51 (0.00) [0.50, 0.52]
balanced_accuracy_weighted:   0.09 (0.00) [0.08, 0.09]
f1_micro:                     0.15 (0.01) [0.13, 0.18]
hamming:                      0.18 (0.00) [0.17, 0.19]
subset_accuracy:              0.08 (0.01) [0.06, 0.10]


## 2.2. ClassifierChain (ordered by frequency) 
Evaluate classification models wrapped in meta estimator ClassifierChain with respect to multi-label performance metrics

In [49]:
by_freq = label_freq_sorted(Y_train)

### 2.2.1. Logistic regression

In [50]:
clf = LogisticRegression(max_iter=10000, random_state=0)
meta_clf = ClassifierChain(clf, order=by_freq, random_state=0).fit(X_train, Y_train)
compute_scores(meta_clf, X_test, Y_test, boot_iter, chain=True)

Mean scores with SE and 95% confidence intervals:

auprc_macro:                  0.20 (0.01) [0.18, 0.22]
auprc_weighted:               0.34 (0.01) [0.32, 0.37]
auroc_macro:                  0.55 (0.01) [0.53, 0.58]
auroc_weighted:               0.53 (0.01) [0.51, 0.55]
brier_macro:                  0.18 (0.00) [0.17, 0.19]
brier_weighted:               0.05 (0.00) [0.05, 0.05]
balanced_accuracy_macro:      0.51 (0.01) [0.50, 0.52]
balanced_accuracy_weighted:   0.09 (0.00) [0.08, 0.09]
f1_micro:                     0.21 (0.01) [0.19, 0.24]
hamming:                      0.21 (0.00) [0.20, 0.22]
subset_accuracy:              0.06 (0.01) [0.04, 0.07]


### 2.2.2. Histogram-based Gradient Boosting

In [51]:
clf = HistGradientBoostingClassifier(random_state=0)
meta_clf = ClassifierChain(clf, order=by_freq, random_state=0).fit(X_train, Y_train)
compute_scores(meta_clf, X_test, Y_test, boot_iter, chain=True)

Mean scores with SE and 95% confidence intervals:

auprc_macro:                  0.23 (0.01) [0.21, 0.25]
auprc_weighted:               0.37 (0.01) [0.35, 0.39]
auroc_macro:                  0.58 (0.01) [0.55, 0.60]
auroc_weighted:               0.57 (0.01) [0.55, 0.59]
brier_macro:                  0.15 (0.00) [0.15, 0.16]
brier_weighted:               0.05 (0.00) [0.04, 0.05]
balanced_accuracy_macro:      0.51 (0.00) [0.50, 0.51]
balanced_accuracy_weighted:   0.08 (0.00) [0.08, 0.08]
f1_micro:                     0.04 (0.01) [0.03, 0.06]
hamming:                      0.17 (0.00) [0.16, 0.17]
subset_accuracy:              0.10 (0.01) [0.08, 0.12]


## 2.3. ClassifierChain (random order) 
Evaluate classification models wrapped in meta estimator ClassifierChain with respect to multi-label performance metrics

### 2.3.1. Logistic regression

In [52]:
clf = LogisticRegression(max_iter=10000, random_state=0)
meta_clf = ClassifierChain(clf, random_state=0).fit(X_train, Y_train)
compute_scores(meta_clf, X_test, Y_test, boot_iter, chain=True)

Mean scores with SE and 95% confidence intervals:

auprc_macro:                  0.20 (0.01) [0.18, 0.22]
auprc_weighted:               0.34 (0.01) [0.32, 0.37]
auroc_macro:                  0.55 (0.01) [0.53, 0.58]
auroc_weighted:               0.53 (0.01) [0.51, 0.55]
brier_macro:                  0.18 (0.00) [0.17, 0.19]
brier_weighted:               0.05 (0.00) [0.05, 0.05]
balanced_accuracy_macro:      0.51 (0.01) [0.50, 0.52]
balanced_accuracy_weighted:   0.09 (0.00) [0.08, 0.09]
f1_micro:                     0.22 (0.01) [0.20, 0.24]
hamming:                      0.21 (0.00) [0.20, 0.22]
subset_accuracy:              0.05 (0.01) [0.04, 0.07]


### 2.3.2. Histogram-based Gradient Boosting

In [53]:
clf = HistGradientBoostingClassifier(random_state=0)
meta_clf = ClassifierChain(clf, random_state=0).fit(X_train, Y_train)
compute_scores(meta_clf, X_test, Y_test, boot_iter, chain=True)

Mean scores with SE and 95% confidence intervals:

auprc_macro:                  0.23 (0.01) [0.21, 0.25]
auprc_weighted:               0.37 (0.01) [0.35, 0.40]
auroc_macro:                  0.58 (0.01) [0.55, 0.60]
auroc_weighted:               0.57 (0.01) [0.55, 0.59]
brier_macro:                  0.15 (0.00) [0.15, 0.16]
brier_weighted:               0.05 (0.00) [0.04, 0.05]
balanced_accuracy_macro:      0.51 (0.00) [0.50, 0.51]
balanced_accuracy_weighted:   0.08 (0.00) [0.08, 0.09]
f1_micro:                     0.04 (0.01) [0.03, 0.06]
hamming:                      0.17 (0.00) [0.16, 0.17]
subset_accuracy:              0.10 (0.01) [0.08, 0.12]
