In [7]:
import pandas as pd
import numpy as np
import os

from sklearn.metrics import roc_auc_score, accuracy_score, confusion_matrix
from sklearn.linear_model import LogisticRegression

In [2]:
csv_dir = '../csv'

In [5]:
def get_stats(labels, preds):    
    preds = preds.flatten()
    binary_preds = (preds > 0.5).astype(int)
    
    print('AUC:', roc_auc_score(labels, preds))
    print('Acc:', accuracy_score(labels, binary_preds))
    
    tn, fp, fn, tp = confusion_matrix(labels, binary_preds).ravel()
    
    ppv = tp / (tp + fp)
    npv = tn / (tn + fn)
    sens = tp / (tp + fn)
    spec = tn / (fp + tn)
    
    print('PPV:', ppv)
    print('NPV:', npv)
    print('Sens:', sens)
    print('Spec:', spec)

## Biomarkers

In [3]:
biomarker_cols = [
    'Age',
    'Sex_F', 
    'Sex_M',
    'APOE A1_2', 
    'APOE A1_3', 
    'APOE A1_4', 
    'APOE A2_2',
    'APOE A2_3', 
    'APOE A2_4', 
    'LEFT_HIPPOCAMPUS_VOLUME',
    'RIGHT_HIPPOCAMPUS_VOLUME', 
    'MMSE Total Score',
    'ADAS13',
    'AD', 
    'CN', 
    'EMCI', 
    'LMCI', 
    'MCI',
    'SMC'
]

## Biomarkers w/ gaussian labels

In [13]:
# Amyloid

train_df = pd.read_csv(csv_dir + '/generated/A_train_complete_updated_gaussian.csv')
val_df = pd.read_csv(csv_dir + '/generated/A_val_complete_updated_gaussian.csv')
test_df = pd.read_csv(csv_dir + '/generated/A_test_complete_updated_gaussian.csv')

train_features = np.array(train_df.loc[:, biomarker_cols])
val_features = np.array(val_df.loc[:, biomarker_cols])
test_features = np.array(test_df.loc[:, biomarker_cols])

train_output = np.array(train_df['A_GAUSSIAN_CLS']).astype(int)
val_output = np.array(val_df['A_GAUSSIAN_CLS']).astype(int)
test_output = np.array(test_df['A_GAUSSIAN_CLS']).astype(int)

clf = LogisticRegression().fit(train_features, train_output)
preds = clf.predict_proba(val_features)

print('Amyloid Class')
get_stats(val_output, preds)

print('-----------')

preds = clf.predict_proba(test_features)
get_stats(test_output, preds)

Amyloid Class
AUC: 0.6737424547283702
Acc: 0.6737588652482269
PPV: 0.676056338028169
NPV: 0.6714285714285714
Sens: 0.676056338028169
Spec: 0.6714285714285714
-----------
AUC: 0.712999548430797
Acc: 0.7222222222222222
PPV: 0.7148936170212766
NPV: 0.7342657342657343
Sens: 0.8155339805825242
Spec: 0.6104651162790697


In [12]:
# Tau

# Load labels from .csv file
tau_train_df = pd.read_csv(csv_dir + '/generated/T_train_complete_updated_gaussian.csv')
tau_val_df = pd.read_csv(csv_dir + '/generated/T_val_complete_updated_gaussian.csv')
tau_test_df = pd.read_csv(csv_dir + '/generated/T_test_complete_updated_gaussian.csv')

train_features = np.array(tau_train_df.loc[:, biomarker_cols])
val_features = np.array(tau_val_df.loc[:, biomarker_cols])
test_features = np.array(tau_test_df.loc[:, biomarker_cols])

train_output = np.array(tau_train_df['T_GAUSSIAN_CLS']).astype(int)
val_output = np.array(tau_val_df['T_GAUSSIAN_CLS']).astype(int)
test_output = np.array(tau_test_df['T_GAUSSIAN_CLS']).astype(int)

clf = LogisticRegression().fit(train_features, train_output)
preds = clf.predict_proba(val_features)

print('Tau Class')
get_stats(val_output, preds)

print('-----------')

preds = clf.predict_proba(test_features)
get_stats(test_output, preds)

Tau Class
AUC: 0.7
Acc: 0.8775510204081632
PPV: 0.8666666666666667
NPV: 1.0
Sens: 1.0
Spec: 0.4
-----------
AUC: 0.5384615384615384
Acc: 0.8888888888888888
PPV: 0.8878504672897196
NPV: 1.0
Sens: 1.0
Spec: 0.07692307692307693


In [11]:
# N

# Load labels from .csv file
N_train_df = pd.read_csv(csv_dir + '/generated/N_train_complete_updated_gaussian.csv')
N_val_df = pd.read_csv(csv_dir + '/generated/N_val_complete_updated_gaussian.csv')
N_test_df = pd.read_csv(csv_dir + '/generated/N_test_complete_updated_gaussian.csv')

train_features = np.array(N_train_df.loc[:, biomarker_cols])
val_features = np.array(N_val_df.loc[:, biomarker_cols])
test_features = np.array(N_test_df.loc[:, biomarker_cols])

train_output = np.array(N_train_df['N_GAUSSIAN_CLS']).astype(int)
val_output = np.array(N_val_df['N_GAUSSIAN_CLS']).astype(int)
test_output = np.array(N_test_df['N_GAUSSIAN_CLS']).astype(int)

clf = LogisticRegression().fit(train_features, train_output)
preds = clf.predict_proba(val_features)

print('N Class')
get_stats(val_output, preds)

print('-----------')

preds = clf.predict_proba(test_features)
get_stats(test_output, preds)

N Class
AUC: 0.7043650793650794
Acc: 0.7285714285714285
PPV: 0.6901408450704225
NPV: 0.7482014388489209
Sens: 0.5833333333333334
Spec: 0.8253968253968254
-----------
AUC: 0.7560544280727767
Acc: 0.7728155339805826
PPV: 0.7790055248618785
NPV: 0.7694610778443114
Sens: 0.6467889908256881
Spec: 0.8653198653198653
