In [1]:
import pandas as pd
import numpy as np
import joblib
import matplotlib.pyplot as plt

from utils.utils import load_data, remove_zero_features, load_confounders, standardize, label_freq_sorted, pca_transform
from utils.utils import generate_oversampled_set, generate_undersampled_set, generate_label_stats
from utils.utils import compute_scores

from sklearn.dummy import DummyClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.multioutput import MultiOutputClassifier, ClassifierChain

from sklearn.model_selection import train_test_split

In [2]:
N_CORES = joblib.cpu_count(only_physical_cores=True)
print(f"Number of physical cores: {N_CORES}")

Number of physical cores: 8


---

In [3]:
# Load data for classification task
subject_data, features, diagnoses = load_data('classification')

In [4]:
# Remove zero features
F = remove_zero_features(features.iloc[:,1:])

In [5]:
# Load confounders
C = load_confounders(subject_data)

In [6]:
# Standardize
X = standardize(F)
print(f"Number of samples: {X.shape[0]}")
print(f"Number of features: {X.shape[1]}")

Number of samples: 2815
Number of features: 922


In [7]:
# Remove ID column
Y = diagnoses.iloc[:,1:]
print(f"Number of labels: {Y.shape[1]}")

Number of labels: 13


In [8]:
boot_iter = 100

# 3. Use oversampled dataset

In [9]:
# Split dataset into train and test (holdout) set
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.25, random_state=0)

In [10]:
X_over, Y_over = generate_oversampled_set(X_train, Y_train)
label_stats, mean_ir = generate_label_stats(Y_over, True)
print(f"Mean imbalance ratio: {mean_ir}")
label_stats

Mean imbalance ratio: 1.6752910161710401


Unnamed: 0,Absolute frequency,Relative frequency,Imbalance ratio
Trauma_And_Stress_RelatedDisorders,1895,0.112296,2.105541
DepressiveDisorders,2577,0.152711,1.548312
Attention_Deficit_HyperactivityDisorder,3496,0.20717,1.141304
MotorDisorder,2386,0.141393,1.672255
AutismSpectrumDisorder,2820,0.167111,1.414894
CommunicationDisorder,3459,0.204978,1.153513
OtherDisorders,1769,0.10483,2.255512
SpecificLearningDisorder,3990,0.236444,1.0
Obsessive_Compulsive_And_RelatedDisorders,2120,0.12563,1.882075
Disruptive,2080,0.123259,1.918269


In [11]:
X_train, Y_train = X_over, Y_over

In [12]:

# Confounder 
C_train, C_test = C.loc[X_train.index], C.loc[X_test.index]

# PCA features
X_pca = pca_transform(F)
X_pca_train, X_pca_test = X_pca.loc[X_train.index], X_pca.loc[X_test.index]


print(f"Number of samples in C training set: {len(C_train)}")
print(f"Number of samples in C test set: {len(C_test)}")

print(f"Number of samples in PCA training set: {len(X_pca_train)}")
print(f"Number of samples in PCA test set: {len(X_pca_test)}")

print(f"Number of samples in training set: {len(X_train)}")
print(f"Number of samples in test set: {len(X_test)}")

Number of samples in C training set: 16875
Number of samples in C test set: 704
Number of samples in PCA training set: 16875
Number of samples in PCA test set: 704
Number of samples in training set: 16875
Number of samples in test set: 704


## 3.1. MultiOutputClassifier
Evaluate classification models wrapped in meta estimator MultiOutputClassifier with respect to multi-label performance metrics

### 3.1.1. Confounders as features

#### 3.1.1.1. Logistic regression

In [15]:
clf = LogisticRegression(max_iter=10000, random_state=0)
meta_clf = MultiOutputClassifier(clf, n_jobs=-1).fit(C_train, Y_train)
compute_scores(meta_clf, C_test, Y_test, boot_iter)

Mean scores with SE and 95% confidence intervals:

auprc_macro:                  0.17 (0.01) [0.16, 0.18]
auprc_weighted:               0.30 (0.01) [0.28, 0.32]
auroc_macro:                  0.49 (0.01) [0.47, 0.51]
auroc_weighted:               0.48 (0.01) [0.46, 0.50]
brier_macro:                  0.14 (0.00) [0.13, 0.14]
brier_weighted:               0.04 (0.00) [0.04, 0.04]
balanced_accuracy_macro:      0.50 (0.00) [0.50, 0.50]
balanced_accuracy_weighted:   0.08 (0.00) [0.08, 0.08]
f1_micro:                     0.00 (0.00) [0.00, 0.00]
hamming:                      0.17 (0.00) [0.16, 0.17]
subset_accuracy:              0.10 (0.01) [0.08, 0.12]


#### 3.1.1.1. Histogram-based Gradient Boosting

In [16]:
clf = HistGradientBoostingClassifier(random_state=0)
meta_clf = MultiOutputClassifier(clf, n_jobs=-1).fit(C_train,Y_train)
compute_scores(meta_clf, C_test, Y_test, boot_iter)

Mean scores with SE and 95% confidence intervals:

auprc_macro:                  0.18 (0.01) [0.17, 0.19]
auprc_weighted:               0.32 (0.01) [0.30, 0.35]
auroc_macro:                  0.48 (0.01) [0.46, 0.51]
auroc_weighted:               0.49 (0.01) [0.47, 0.51]
brier_macro:                  0.15 (0.00) [0.14, 0.15]
brier_weighted:               0.04 (0.00) [0.04, 0.04]
balanced_accuracy_macro:      0.50 (0.00) [0.49, 0.51]
balanced_accuracy_weighted:   0.08 (0.00) [0.08, 0.09]
f1_micro:                     0.25 (0.01) [0.23, 0.27]
hamming:                      0.19 (0.00) [0.19, 0.20]
subset_accuracy:              0.06 (0.01) [0.04, 0.08]


### 3.1.2. PCA-projected data (top-10 components) as features 

#### 3.1.2.1. Logistic regression

In [17]:
clf = LogisticRegression(max_iter=10000, random_state=0)
meta_clf = MultiOutputClassifier(clf).fit(X_pca_train, Y_train)
compute_scores(meta_clf, X_pca_test, Y_test, boot_iter)

Mean scores with SE and 95% confidence intervals:

auprc_macro:                  0.22 (0.01) [0.20, 0.23]
auprc_weighted:               0.36 (0.01) [0.34, 0.38]
auroc_macro:                  0.58 (0.01) [0.56, 0.59]
auroc_weighted:               0.56 (0.01) [0.55, 0.58]
brier_macro:                  0.13 (0.00) [0.13, 0.14]
brier_weighted:               0.04 (0.00) [0.04, 0.04]
balanced_accuracy_macro:      0.50 (0.00) [0.50, 0.51]
balanced_accuracy_weighted:   0.08 (0.00) [0.08, 0.08]
f1_micro:                     0.01 (0.00) [0.01, 0.02]
hamming:                      0.17 (0.00) [0.16, 0.18]
subset_accuracy:              0.10 (0.01) [0.08, 0.12]


#### 3.1.2.2. Histogram-based Gradient Boosting

In [21]:
clf = HistGradientBoostingClassifier(random_state=0)
meta_clf = MultiOutputClassifier(clf).fit(X_pca_train,Y_train) # n_jobs=-1
compute_scores(meta_clf, X_pca_test, Y_test, boot_iter)

Mean scores with SE and 95% confidence intervals:

auprc_macro:                  0.22 (0.01) [0.20, 0.25]
auprc_weighted:               0.36 (0.01) [0.34, 0.38]
auroc_macro:                  0.56 (0.01) [0.54, 0.58]
auroc_weighted:               0.56 (0.01) [0.54, 0.58]
brier_macro:                  0.13 (0.00) [0.12, 0.13]
brier_weighted:               0.03 (0.00) [0.03, 0.03]
balanced_accuracy_macro:      0.52 (0.00) [0.51, 0.53]
balanced_accuracy_weighted:   0.09 (0.00) [0.09, 0.09]
f1_micro:                     0.32 (0.01) [0.29, 0.34]
hamming:                      0.17 (0.00) [0.16, 0.18]
subset_accuracy:              0.09 (0.01) [0.06, 0.11]


### 3.1.3. Original features (standardized)

#### 3.1.3.1. Logistic regression

In [22]:
clf = LogisticRegression(max_iter=10000, random_state=0)
meta_clf = MultiOutputClassifier(clf, n_jobs=-1).fit(X_train, Y_train) # n_jobs=-1
compute_scores(meta_clf, X_test, Y_test, boot_iter)

Mean scores with SE and 95% confidence intervals:

auprc_macro:                  0.21 (0.01) [0.19, 0.23]
auprc_weighted:               0.35 (0.01) [0.33, 0.37]
auroc_macro:                  0.56 (0.01) [0.54, 0.58]
auroc_weighted:               0.56 (0.01) [0.54, 0.57]
brier_macro:                  0.20 (0.00) [0.20, 0.21]
brier_weighted:               0.05 (0.00) [0.05, 0.05]
balanced_accuracy_macro:      0.52 (0.01) [0.51, 0.53]
balanced_accuracy_weighted:   0.09 (0.00) [0.09, 0.09]
f1_micro:                     0.33 (0.01) [0.31, 0.35]
hamming:                      0.24 (0.00) [0.23, 0.24]
subset_accuracy:              0.03 (0.01) [0.01, 0.04]


#### 3.1.3.2. SVM

In [31]:
clf = SVC(class_weight='balanced', kernel='rbf', gamma='scale', probability=True, random_state=0)
meta_clf = MultiOutputClassifier(clf, n_jobs=-1).fit(X_train, Y_train)
compute_scores(meta_clf, X_test, Y_test, boot_iter)

Mean scores with SE and 95% confidence intervals:

auprc_macro:                  0.22 (0.01) [0.21, 0.24]
auprc_weighted:               0.37 (0.01) [0.34, 0.39]
auroc_macro:                  0.58 (0.01) [0.56, 0.61]
auroc_weighted:               0.58 (0.01) [0.56, 0.60]
brier_macro:                  0.14 (0.00) [0.13, 0.14]
brier_weighted:               0.04 (0.00) [0.03, 0.04]
balanced_accuracy_macro:      0.53 (0.00) [0.52, 0.54]
balanced_accuracy_weighted:   0.09 (0.00) [0.09, 0.09]
f1_micro:                     0.44 (0.01) [0.42, 0.46]
hamming:                      0.18 (0.00) [0.17, 0.19]
subset_accuracy:              0.08 (0.01) [0.06, 0.10]


#### 3.1.3.3. Histogram-based Gradient Boosting

In [24]:
clf = HistGradientBoostingClassifier(random_state=0)
meta_clf = MultiOutputClassifier(clf).fit(X_train, Y_train) # n_jobs=-1
compute_scores(meta_clf, X_test, Y_test, boot_iter)

Mean scores with SE and 95% confidence intervals:

auprc_macro:                  0.23 (0.01) [0.21, 0.25]
auprc_weighted:               0.36 (0.01) [0.33, 0.38]
auroc_macro:                  0.59 (0.01) [0.56, 0.61]
auroc_weighted:               0.57 (0.01) [0.55, 0.59]
brier_macro:                  0.13 (0.00) [0.12, 0.13]
brier_weighted:               0.03 (0.00) [0.03, 0.03]
balanced_accuracy_macro:      0.51 (0.00) [0.50, 0.51]
balanced_accuracy_weighted:   0.09 (0.00) [0.08, 0.09]
f1_micro:                     0.34 (0.01) [0.32, 0.36]
hamming:                      0.16 (0.00) [0.15, 0.16]
subset_accuracy:              0.10 (0.01) [0.08, 0.11]


#### 3.1.3.4. MLP

In [25]:
clf = MLPClassifier(random_state=0)
meta_clf = MultiOutputClassifier(clf).fit(X_train, Y_train) # n_jobs=-1
compute_scores(meta_clf, X_test, Y_test, boot_iter)

Mean scores with SE and 95% confidence intervals:

auprc_macro:                  0.22 (0.01) [0.20, 0.24]
auprc_weighted:               0.36 (0.01) [0.34, 0.38]
auroc_macro:                  0.58 (0.01) [0.55, 0.60]
auroc_weighted:               0.57 (0.01) [0.55, 0.59]
brier_macro:                  0.17 (0.00) [0.16, 0.17]
brier_weighted:               0.04 (0.00) [0.04, 0.04]
balanced_accuracy_macro:      0.52 (0.00) [0.51, 0.53]
balanced_accuracy_weighted:   0.09 (0.00) [0.09, 0.09]
f1_micro:                     0.36 (0.01) [0.34, 0.38]
hamming:                      0.18 (0.00) [0.18, 0.19]
subset_accuracy:              0.08 (0.01) [0.06, 0.10]


## 3.2. ClassifierChain (ordered by frequency) 
Evaluate classification models wrapped in meta estimator ClassifierChain with respect to multi-label performance metrics

In [26]:
by_freq = label_freq_sorted(Y_train)

### 3.2.1. Logistic regression

In [27]:
clf = LogisticRegression(max_iter=10000, random_state=0)
meta_clf = ClassifierChain(clf, order=by_freq, random_state=0).fit(X_train, Y_train)
compute_scores(meta_clf, X_test, Y_test, boot_iter, chain=True)

Mean scores with SE and 95% confidence intervals:

auprc_macro:                  0.20 (0.01) [0.19, 0.22]
auprc_weighted:               0.35 (0.01) [0.33, 0.37]
auroc_macro:                  0.56 (0.01) [0.54, 0.58]
auroc_weighted:               0.55 (0.01) [0.53, 0.57]
brier_macro:                  0.20 (0.00) [0.20, 0.21]
brier_weighted:               0.05 (0.00) [0.05, 0.05]
balanced_accuracy_macro:      0.52 (0.01) [0.51, 0.53]
balanced_accuracy_weighted:   0.09 (0.00) [0.09, 0.09]
f1_micro:                     0.33 (0.01) [0.31, 0.35]
hamming:                      0.23 (0.00) [0.22, 0.24]
subset_accuracy:              0.03 (0.01) [0.01, 0.04]


### 3.2.2. Histogram-based Gradient Boosting

In [28]:
clf = HistGradientBoostingClassifier(random_state=0)
meta_clf = ClassifierChain(clf, order=by_freq, random_state=0).fit(X_train, Y_train)
compute_scores(meta_clf, X_test, Y_test, boot_iter, chain=True)

Mean scores with SE and 95% confidence intervals:

auprc_macro:                  0.22 (0.01) [0.21, 0.25]
auprc_weighted:               0.36 (0.01) [0.34, 0.38]
auroc_macro:                  0.59 (0.01) [0.57, 0.61]
auroc_weighted:               0.57 (0.01) [0.55, 0.59]
brier_macro:                  0.13 (0.00) [0.12, 0.13]
brier_weighted:               0.03 (0.00) [0.03, 0.03]
balanced_accuracy_macro:      0.51 (0.00) [0.50, 0.52]
balanced_accuracy_weighted:   0.09 (0.00) [0.08, 0.09]
f1_micro:                     0.35 (0.01) [0.33, 0.37]
hamming:                      0.16 (0.00) [0.15, 0.16]
subset_accuracy:              0.10 (0.01) [0.07, 0.12]


## 3.3. ClassifierChain (random order) 
Evaluate classification models wrapped in meta estimator ClassifierChain with respect to multi-label performance metrics

### 3.3.1. Logistic regression

In [29]:
clf = LogisticRegression(max_iter=10000, random_state=0)
meta_clf = ClassifierChain(clf, random_state=0).fit(X_train, Y_train)
compute_scores(meta_clf, X_test, Y_test, boot_iter, chain=True)

Mean scores with SE and 95% confidence intervals:

auprc_macro:                  0.20 (0.01) [0.19, 0.22]
auprc_weighted:               0.35 (0.01) [0.33, 0.37]
auroc_macro:                  0.56 (0.01) [0.54, 0.58]
auroc_weighted:               0.55 (0.01) [0.53, 0.57]
brier_macro:                  0.20 (0.00) [0.20, 0.21]
brier_weighted:               0.05 (0.00) [0.05, 0.05]
balanced_accuracy_macro:      0.52 (0.01) [0.51, 0.53]
balanced_accuracy_weighted:   0.09 (0.00) [0.09, 0.09]
f1_micro:                     0.33 (0.01) [0.31, 0.35]
hamming:                      0.23 (0.00) [0.23, 0.24]
subset_accuracy:              0.02 (0.01) [0.01, 0.03]


### 3.3.2. Histogram-based Gradient Boosting

In [30]:
clf = HistGradientBoostingClassifier(random_state=0)
meta_clf = ClassifierChain(clf, random_state=0).fit(X_train, Y_train)
compute_scores(meta_clf, X_test, Y_test, boot_iter, chain=True)

Mean scores with SE and 95% confidence intervals:

auprc_macro:                  0.22 (0.01) [0.21, 0.25]
auprc_weighted:               0.36 (0.01) [0.34, 0.39]
auroc_macro:                  0.59 (0.01) [0.57, 0.60]
auroc_weighted:               0.57 (0.01) [0.55, 0.59]
brier_macro:                  0.13 (0.00) [0.12, 0.13]
brier_weighted:               0.03 (0.00) [0.03, 0.03]
balanced_accuracy_macro:      0.51 (0.00) [0.51, 0.52]
balanced_accuracy_weighted:   0.09 (0.00) [0.09, 0.09]
f1_micro:                     0.35 (0.01) [0.33, 0.37]
hamming:                      0.16 (0.00) [0.15, 0.16]
subset_accuracy:              0.10 (0.01) [0.08, 0.12]
