# Calculate additional performance metrics: Accuracy, balanced accuracy, sensitivity, specificity

In [1]:
import sys

sys.path.insert(0, '/home/sebastian/workspace/abcd_paper')

In [2]:
from pathlib import Path

import numpy as np
import pandas as pd
from sklearn.metrics import roc_curve, balanced_accuracy_score, accuracy_score

from src.definitions import PROCESSED_DATA_DIR, RESULTS_DIR

In [3]:
abcd_data = pd.read_csv(PROCESSED_DATA_DIR / 'abcd_data.csv', index_col='src_subject_id')

In [4]:
def get_scores(segmentation: str):
    bacc_sens_spec = {}

    for fold_i in range(150):
        filepath = Path(
            "/home/sebastian/workspace/abcd_paper/"
            f"work/{segmentation}_{fold_i:04d}_xgboost_cce/results/"
            f"run_unpermuted_seed77n30k5_{segmentation}_adjusted/"
            f"unpermuted/xgboost_cce/test/"
        )
        predictions = pd.read_csv(filepath / f"fold_{fold_i}.csv", index_col='src_subject_id')

        for diagnosis, predictions in predictions.items():
            y_true, y_pred = abcd_data[diagnosis].align(other=predictions, join='inner')
            fpr, tpr, threshold = roc_curve(
                y_true=y_true,
                y_score=y_pred
            )
            # Compute optimal threshold value, corresponding to maximum
            # Youden index
            optimal_idx = np.argmax(tpr - fpr)
            optimal_threshold = threshold[optimal_idx]
            # Compute sensitivity and specificity
            sensitivity = tpr[optimal_idx]
            specificity = 1 - fpr[optimal_idx]
            # Apply threshold to predictions and calculate balanced accuracy
            binary_predictions = predictions >= optimal_threshold
            balanced_accuracy = balanced_accuracy_score(
                y_true=y_true,
                y_pred=binary_predictions
            )
            accuracy = accuracy_score(
                y_true=y_true,
                y_pred=binary_predictions
            )
            # Store values
            if diagnosis not in bacc_sens_spec:
                bacc_sens_spec[diagnosis] = {
                    'accuracy': [], 'balanced_acc': [], 'sensitivity': [], 'specificity': []
                }
            bacc_sens_spec[diagnosis]['accuracy'].append(accuracy)
            bacc_sens_spec[diagnosis]['balanced_acc'].append(balanced_accuracy)
            bacc_sens_spec[diagnosis]['sensitivity'].append(sensitivity)
            bacc_sens_spec[diagnosis]['specificity'].append(specificity)

    return bacc_sens_spec


def aggregate_scores(bacc_sens_spec):
    scores = []

    for label, metrics in bacc_sens_spec.items():
        metrics = pd.DataFrame(metrics)
        df = pd.DataFrame({
            "mean": metrics.mean(),
            "min": metrics.min(),
            "max": metrics.max(),
        })
        df.columns = pd.MultiIndex.from_product(
            [(label,), df.columns]
        )

        scores.append(df)

    scores = pd.concat(scores, axis=1).T
    return scores

## FreeSurfer

In [5]:
aggregate_scores(get_scores("freesurfer")).round(3)

Unnamed: 0,Unnamed: 1,accuracy,balanced_acc,sensitivity,specificity
Major Depressive Disorder,mean,0.599,0.559,0.515,0.603
Major Depressive Disorder,min,0.152,0.508,0.034,0.116
Major Depressive Disorder,max,0.947,0.605,0.966,0.988
Bipolar Disorder,mean,0.594,0.562,0.526,0.599
Bipolar Disorder,min,0.233,0.515,0.138,0.178
Bipolar Disorder,max,0.882,0.61,0.931,0.936
Psychotic Symptoms,mean,0.468,0.556,0.649,0.463
Psychotic Symptoms,min,0.046,0.509,0.029,0.018
Psychotic Symptoms,max,0.976,0.63,1.0,1.0
ADHD,mean,0.552,0.561,0.577,0.546


## SRI24

In [6]:
aggregate_scores(get_scores("sri")).round(3)

Unnamed: 0,Unnamed: 1,accuracy,balanced_acc,sensitivity,specificity
Major Depressive Disorder,mean,0.492,0.548,0.609,0.486
Major Depressive Disorder,min,0.057,0.506,0.047,0.011
Major Depressive Disorder,max,0.933,0.599,1.0,0.976
Bipolar Disorder,mean,0.527,0.545,0.567,0.524
Bipolar Disorder,min,0.127,0.508,0.032,0.065
Bipolar Disorder,max,0.928,0.599,0.98,0.994
Psychotic Symptoms,mean,0.509,0.545,0.583,0.507
Psychotic Symptoms,min,0.046,0.507,0.025,0.018
Psychotic Symptoms,max,0.97,0.628,1.0,0.996
ADHD,mean,0.549,0.533,0.508,0.559
