In [None]:
# NOTE: switch to the parent directory
%cd ..

In [None]:
# Imports
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import KFold,StratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
from sklearn.metrics import (roc_auc_score, average_precision_score, confusion_matrix, roc_curve, precision_recall_curve,
                            auc, accuracy_score)
from sklearn.dummy import DummyClassifier

from copy import deepcopy

from utils.preproc_utils import run_preprocessing
from utils.plotting_utils import plotting_setup, CB_COLOR_CYCLE, STYLES

from tqdm import tqdm

In [None]:
# Constants
TARGET_VARIABLE = 'DiagnosisByCriteria' # 'DiagnosisByCriteria', 'TreatmentGroupBinar', 'AppendicitisComplications'
SEED = 1799
VARIABLE_ORDERING = ['Age', 'BMI', 'Sex', 'Height', 'Weight', 'AlvaradoScore', 'PediatricAppendicitisScore', 'AppendixOnSono',
                     'AppendixDiameter', 'MigratoryPain', 'LowerAbdominalPainRight', 'ReboundTenderness', 'CoughingPain', 'PsoasSign',
                     'Nausea', 'AppetiteLoss', 'BodyTemp', 'WBCCount', 'NeutrophilPerc', 'KetonesInUrine', 'ErythrocytesInUrine',
                     'WBCInUrine', 'CRPEntry', 'Dysuria', 'Stool', 'Peritonitis', 'FreeFluids', 'AppendixWallLayers', 'Kokarde',
                     'TissuePerfusion', 'SurroundingTissueReaction', 'PathLymphNodes', 'MesentricLymphadenitis', 'BowelWallThick',
                     'Ileus', 'FecalImpaction', 'Meteorism', 'Enteritis']
VARIABLE_NAMES =    ['Age', 'BMI', 'Sex', 'Height', 'Weight', 'AS', 'PAS', 'Visibility of appendix',
                     'Appendix diameter', 'Migration of pain', 'Tenderness in RLQ', 'Rebound tenderness', 'Cough tenderness',
                     'Psoas sign', 'Nausea/vomitting', 'Annorexia', 'Body temperature', 'WBC count', 'Neutrophil percentage',
                     'Ketones in urine', 'Erythrocytes in urine', 'WBC in urine', 'CRP', 'Dysuria', 'Stool',
                     'Peritonitis', 'Free intraperitoneal fluid', 'Appendix layers', 'Target sign',
                     'Appendix perfusion', 'Surrounding tissue reaction', 'Path. lymph nodes', 'Mesenteric lymphadenitis',
                     'Bowel wall thickening', 'Ileus', 'Coprostasis', 'Meteorism', 'Enteritis']

In [None]:
# Utility functions
def bootstrap_resample(data, labels):
    n_data_points = len(data)
    bootstrap_indices = np.random.choice(n_data_points, size=n_data_points, replace=True)
    return data.iloc[bootstrap_indices], labels.iloc[bootstrap_indices]

In [None]:
# Load the raw data
app_data_regensburg = pd.read_csv('./data/app_data.csv')
app_data_dusseldorf = pd.read_csv('./data/app_data_ext.csv')

In [None]:
# Preprocess and impute the data
app_data_regensburg, app_data_dusseldorf = run_preprocessing(app_data_regensburg, app_data_dusseldorf)

In [None]:
# Construct targets and design matrices
y_regensburg = app_data_regensburg[TARGET_VARIABLE]
X_regensburg = app_data_regensburg.drop(['DiagnosisByCriteria', 'TreatmentGroupBinar', 'AppendicitisComplications'], axis=1)

y_dusseldorf = app_data_dusseldorf[TARGET_VARIABLE]
X_dusseldorf = app_data_dusseldorf.drop(['DiagnosisByCriteria', 'TreatmentGroupBinar', 'AppendicitisComplications'], axis=1)

In [None]:
# Permutation variable importance evaluation on the training set

# Number of bootstrap resamples
# NOTE: we use fewer resamples than usual due to computational costs
B = 100

# Fix seed for reproducibility
random.seed(SEED)
np.random.seed(SEED)

auroc_dropoffs = {}
balanced_accuracy_dropoffs = {}
for col in X_regensburg.columns:
    auroc_dropoffs[col] = []
    balanced_accuracy_dropoffs[col] = []

for b in tqdm(np.arange(B)):
    # Make a bootstrap resample of the internal and external sets
    X_regensburg_b, y_regensburg_b = bootstrap_resample(X_regensburg, y_regensburg)
    X_dusseldorf_b, y_dusseldorf_b = bootstrap_resample(X_dusseldorf, y_dusseldorf)

    # Predictive model
    model_rf_b = RandomForestClassifier(n_estimators=1000, random_state=SEED)

    # Train the model
    model_rf_b.fit(X_regensburg_b, y_regensburg_b)

    # Iterate through the variables
    for col in X_regensburg.columns:
        # Permute the corresponding column
        X_regensburg_b_permuted = deepcopy(X_regensburg_b)
        X_dusseldorf_b_permuted = deepcopy(X_dusseldorf_b)
        np.random.shuffle(X_regensburg_b_permuted[col].values)
        np.random.shuffle(X_dusseldorf_b_permuted[col].values)

        # Evaluate the model under the permutation
        y_train_proba_permuted = model_rf_b.predict_proba(X_regensburg_b_permuted)[:, 1]
        auroc_permuted = roc_auc_score(y_regensburg_b, y_train_proba_permuted)
        y_train_pred_permuted = model_rf_b.predict(X_regensburg_b_permuted)
        cm_permuted = confusion_matrix(y_regensburg_b, y_train_pred_permuted)
        sensitivity_permuted = cm_permuted[1, 1] / (cm_permuted[1, 1] + cm_permuted[1, 0])
        specificity_permuted = cm_permuted[0, 0] / (cm_permuted[0, 0] + cm_permuted[0, 1])
        balanced_accuracy_permuted = (sensitivity_permuted + specificity_permuted) / 2

        # Evaluate the model w/o the permutation
        y_train_proba = model_rf_b.predict_proba(X_regensburg_b)[:, 1]
        auroc = roc_auc_score(y_regensburg_b, y_train_proba)
        y_train_pred = model_rf_b.predict(X_regensburg_b)
        cm = confusion_matrix(y_regensburg_b, y_train_pred)
        sensitivity = cm[1, 1] / (cm[1, 1] + cm[1, 0])
        specificity = cm[0, 0] / (cm[0, 0] + cm[0, 1])
        balanced_accuracy = (sensitivity + specificity) / 2

        # Compute and store dropoffs
        auroc_dropoffs[col].append(auroc - auroc_permuted)
        balanced_accuracy_dropoffs[col].append(balanced_accuracy - balanced_accuracy_permuted)

In [None]:
# Permutation variable importance evaluation on the test set

# Number of bootstrap resamples
# NOTE: we use fewer resamples than usual due to computational costs
B = 100

# Fix seed for reproducibility
random.seed(SEED)
np.random.seed(SEED)

auroc_dropoffs = {}
balanced_accuracy_dropoffs = {}
for col in X_regensburg.columns:
    auroc_dropoffs[col] = []
    balanced_accuracy_dropoffs[col] = []

for b in tqdm(np.arange(B)):
    # Make a bootstrap resample of the internal and external sets
    X_regensburg_b, y_regensburg_b = bootstrap_resample(X_regensburg, y_regensburg)
    X_dusseldorf_b, y_dusseldorf_b = bootstrap_resample(X_dusseldorf, y_dusseldorf)

    # Predictive model
    model_rf_b = RandomForestClassifier(n_estimators=1000, random_state=SEED)

    # Train the model
    model_rf_b.fit(X_regensburg_b, y_regensburg_b)

    # Iterate through the variables
    for col in X_regensburg.columns:
        # Permute the corresponding column
        X_regensburg_b_permuted = deepcopy(X_regensburg_b)
        X_dusseldorf_b_permuted = deepcopy(X_dusseldorf_b)
        np.random.shuffle(X_regensburg_b_permuted[col].values)
        np.random.shuffle(X_dusseldorf_b_permuted[col].values)

        # Evaluate the model under the permutation
        y_train_proba_permuted = model_rf_b.predict_proba(X_dusseldorf_b_permuted)[:, 1]
        auroc_permuted = roc_auc_score(y_dusseldorf_b, y_train_proba_permuted)
        y_train_pred_permuted = model_rf_b.predict(X_dusseldorf_b_permuted)
        cm_permuted = confusion_matrix(y_dusseldorf_b, y_train_pred_permuted)
        sensitivity_permuted = cm_permuted[1, 1] / (cm_permuted[1, 1] + cm_permuted[1, 0])
        specificity_permuted = cm_permuted[0, 0] / (cm_permuted[0, 0] + cm_permuted[0, 1])
        balanced_accuracy_permuted = (sensitivity_permuted + specificity_permuted) / 2

        # Evaluate the model w/o the permutation
        y_train_proba = model_rf_b.predict_proba(X_dusseldorf_b)[:, 1]
        auroc = roc_auc_score(y_dusseldorf_b, y_train_proba)
        y_train_pred = model_rf_b.predict(X_dusseldorf_b)
        cm = confusion_matrix(y_dusseldorf_b, y_train_pred)
        sensitivity = cm[1, 1] / (cm[1, 1] + cm[1, 0])
        specificity = cm[0, 0] / (cm[0, 0] + cm[0, 1])
        balanced_accuracy = (sensitivity + specificity) / 2

        # Compute and store dropoffs
        auroc_dropoffs[col].append(auroc - auroc_permuted)
        balanced_accuracy_dropoffs[col].append(balanced_accuracy - balanced_accuracy_permuted)

In [None]:
plotting_setup(12)

plt.figure(figsize=(8, 8))

zeroline = plt.axvline(x=0, color='gray', linestyle='--')

bp = plt.boxplot([v for k, v in auroc_dropoffs.items()], showfliers=False, vert=False, patch_artist=True, notch=True,
                 medianprops=dict(color='black', linewidth=2.5))
for box in bp['boxes']:
    box.set(facecolor='white')

ytk = plt.yticks(np.arange(len(VARIABLE_NAMES)) + 1, VARIABLE_NAMES, rotation=15, ha='right')
plt.grid(visible=True, axis='y')
plt.xlabel('Bootstrapped Decrease in Test-set AUROC')
plt.title('Permutation Variable Importance: Diagnosis', fontdict={'fontsize': 12})

plt.savefig('notebooks/plots/variable_importance_diagnosis.pdf', format='pdf', bbox_inches='tight')