# Testing Baseline (Multitask Learning Model)

Importing the functions needed from the `mtl_patients` module:

In [None]:
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd

import sys
pathname = "../code/"
if pathname not in sys.path:
    sys.path.append("../code/")

from mtl_patients import get_summaries, run_mortality_prediction_task

Run summaries. Default (no parameters) assumes collection of data for first 24 hours and 12 hours of gap after that period to start predicting mortality.

In [None]:
pat_summ_by_cu_df, pat_summ_by_sapsiiq_df, vitals_labs_summ_df = get_summaries()

In [None]:
pat_summ_by_cu_df

In [None]:
pat_summ_by_sapsiiq_df

In [None]:
vitals_labs_summ_df

Run the mortality prediction task using the global model. Default (no parameters) assumes collection of data for first 24 hours and 12 hours of gap after that period to start predicting mortality.

In [None]:
pd.options.display.max_rows = 9999
metrics_df = run_mortality_prediction_task()

First run.

In [None]:
metrics_df

Second run.

In [None]:
metrics_df

They are the same!

## `run_mortality_prediction()` step by step

Imports:

In [None]:
import os
import tensorflow as tf
import numpy as np
import pandas as pd
import random
from keras.callbacks import EarlyStopping
from keras.layers import Input, Dense, LSTM, RepeatVector
from keras.models import Model, Sequential
from keras.optimizers import Adam
from sklearn.metrics import roc_auc_score, precision_score, recall_score, roc_curve
from sklearn.mixture import GaussianMixture
from sklearn.model_selection import train_test_split
from tqdm.autonotebook import tqdm

Arguments for `run_mortality_prediction()`:

In [None]:
model_type='multitask'
cutoff_hours=24
gap_hours=12
save_to_folder='../data/'
cohort_criteria_to_select='careunits'
seed=0
cohort_unsupervised_filename='../data/unsupervised_clusters.npy'
lstm_layer_size=16
epochs=30
learning_rate=0.0001
use_cohort_inv_freq_weights=False
bootstrap=False
num_bootstrapped_samples=100
sensitivity=0.8

Imports for local functions needed by `run_mortality_prediction()`:

In [None]:
from mtl_patients import set_global_determinism, prepare_data, stratified_split
from mtl_patients import create_single_task_learning_model, create_multitask_learning_model
from mtl_patients import bootstrap_predict
from mtl_patients import get_mtl_sample_weights, get_correct_task_mtl_outputs

Code in `run_mortality_prediction()` common to all models:

In [None]:
# setting the seeds to get reproducible results
# taken from https://stackoverflow.com/questions/36288235/how-to-get-stable-results-with-tensorflow-setting-random-seed
set_global_determinism(seed=seed)

# create folders to store models and results
for folder in ['results', 'models']:
    if not os.path.exists(os.path.join(save_to_folder, folder)):
        os.makedirs(os.path.join(save_to_folder, folder))

X, Y, careunits, sapsii_quartile, subject_ids = prepare_data(cutoff_hours=cutoff_hours, gap_hours=gap_hours)
Y = Y.astype(int) # Y is originally a boolean

print('+' * 80, flush=True)
print('Running the Mortality Prediction Task', flush=True)
print('-' * 80, flush=True)

# fetch right cohort criteria
if cohort_criteria_to_select == 'careunits':
    cohort_criteria = careunits
elif cohort_criteria_to_select == 'sapsii_quartile':
    cohort_criteria = sapsii_quartile
elif cohort_criteria_to_select == 'unsupervised':
    cohort_criteria = np.load(f"{cohort_unsupervised_filename}")

# Do train/validation/test split using `cohort_criteria` as the cohort classifier
print('    Splitting data into train/validation/test sets...', flush=True)
X_train, X_val, X_test, y_train, y_val, y_test, cohorts_train, cohorts_val, cohorts_test = \
    stratified_split(X, Y, cohort_criteria, train_val_random_seed=seed)

# one task by distinct cohort
tasks = np.unique(cohorts_train)

# calculate number of samples per cohort and its reciprocal
# (to be used in sample weight calculation)
print('    Calculating number of training samples in cohort...', flush=True)
task_weights = {}
for cohort in tasks:
    num_samples_in_cohort = len(np.where(cohorts_train == cohort)[0])
    print(f"        # of patients in cohort {cohort} is {str(num_samples_in_cohort)}")
    task_weights[cohort] = len(X_train) / num_samples_in_cohort

sample_weight = None
if use_cohort_inv_freq_weights:
    # calculate sample weight as the cohort's inverse frequency corresponding to each sample
    sample_weight = np.array([task_weights[cohort] for cohort in cohorts_train])

model_filename = f"{save_to_folder}models/model_{model_type}_{cutoff_hours}+{gap_hours}_{cohort_criteria_to_select}"
filename_part_bootstrap = "bootstrap-ON" if bootstrap else "bootstrap-OFF"
results_filename = f'{save_to_folder}results/model_{model_type}_{cutoff_hours}+{gap_hours}'
results_filename = results_filename + f'_{cohort_criteria_to_select}_{filename_part_bootstrap}.h5'

Code specific for multitask learning model:

In [None]:
model_filename

In [None]:
#--------------------------
# train the multitask model

print('    ' + '~' * 76)
print(f"    Training '{model_type}' model...")

num_tasks = len(tasks)
cohort_to_index = dict(zip(tasks, range(num_tasks)))
model = create_multitask_learning_model(lstm_layer_size=lstm_layer_size, input_dims=X_train.shape[1:],
                                        output_dims=1, tasks=tasks, learning_rate=learning_rate)
print(model.summary())

In [None]:
early_stopping = EarlyStopping(monitor='val_loss', patience=4)

model.fit(X_train, [y_train for i in range(num_tasks)], epochs=epochs, batch_size=100,
        sample_weight=get_mtl_sample_weights(y_train, cohorts_train, tasks, sample_weight=sample_weight),
        callbacks=[early_stopping],
        validation_data=(X_val, [y_val for i in range(num_tasks)]))
model.save(model_filename)

In [None]:
print('    ' + '~' * 76)
print(f"    Predicting using '{model_type}' model...", flush=True)
y_scores = np.squeeze(model.predict(X_test))

In [None]:
y_scores

## With no bootstrapping

In [None]:
tasks_str = [str(task) for task in tasks]
metrics_df = pd.DataFrame(index=np.append(tasks_str, ['Macro', 'Micro']), dtype=float)

for task in tasks:
    y_scores_in_cohort = y_scores[cohort_to_index[task], cohorts_test == task]
    y_true_in_cohort = y_test[cohorts_test == task]

    ## get TPR, aka sensitivity, and thresholds (using micro metric)
    _, tpr, thresholds = roc_curve(y_true_in_cohort, y_scores_in_cohort)
    ## threshold close to give target TPR, e.g., 80%
    threshold_target = thresholds[np.argmin(np.abs(tpr - sensitivity))]
    ### Why 80% threshold? That is what the paper selected to display the results
    ## use calculated threshold to do predictions
    y_pred_in_cohort = (y_scores_in_cohort > threshold_target).astype("int32")

    auc = roc_auc_score(y_true_in_cohort, y_scores_in_cohort)
    ppv = precision_score(y_true_in_cohort, y_pred_in_cohort, zero_division=0)
    specificity = recall_score(y_true_in_cohort, y_pred_in_cohort, pos_label=0)
    metrics_df.loc[str(task), 'AUC'] = auc
    metrics_df.loc[str(task), 'PPV'] = ppv
    metrics_df.loc[str(task), 'Specificity'] = specificity

# calculate macro metrics
metrics_df.loc['Macro', :] = metrics_df.loc[(metrics_df.index != 'Macro') & (metrics_df.index != 'Micro')].mean()

In [None]:
metrics_df

In [None]:
_, tpr, thresholds = roc_curve(y_test, y_scores[[cohort_to_index[c] for c in cohorts_test], np.arange(len(y_test))])

In [None]:
threshold_target = thresholds[np.argmin(np.abs(tpr - sensitivity))]
threshold_target

In [None]:
y_pred = (y_scores[[cohort_to_index[c] for c in cohorts_test], np.arange(len(y_test))] > threshold_target).astype("int32")

In [None]:
y_pred

In [None]:
metrics_df.loc['Micro', 'PPV'] = precision_score(y_test, y_pred)#[[cohort_to_index[c] for c in cohorts_test], np.arange(len(y_test))])

In [None]:
# calculate micro metrics
## get TPR, aka sensitivity, and thresholds
_, tpr, thresholds = roc_curve(y_test, y_scores[[cohort_to_index[c] for c in cohorts_test], np.arange(len(y_test))])
## threshold close to give target TPR, e.g., 80%
threshold_target = thresholds[np.argmin(np.abs(tpr - sensitivity))]
### Why 80% threshold? That is what the paper selected to display the results
## use calculated threshold to do predictions
y_pred = (y_scores[[cohort_to_index[c] for c in cohorts_test], np.arange(len(y_test))] > threshold_target).astype("int32")

metrics_df.loc['Micro', 'AUC'] = roc_auc_score(y_test, y_scores[[cohort_to_index[c] for c in cohorts_test], np.arange(len(y_test))])
metrics_df.loc['Micro', 'PPV'] = precision_score(y_test, y_pred[[cohort_to_index[c] for c in cohorts_test], np.arange(len(y_test))])
metrics_df.loc['Micro', 'Specificity'] = recall_score(y_test, y_pred[[cohort_to_index[c] for c in cohorts_test], np.arange(len(y_test))], pos_label=0)

## With bootstrapping

In [None]:
# get `num_bootstrapped_samples` and calculate AUC, PPV, and specificity

tasks_str = [str(task) for task in tasks]
lst_of_tasks = list(tasks_str)
lst_of_tasks.append('Micro')
lst_of_tasks.append('Macro')

idx = pd.MultiIndex.from_product([lst_of_tasks, list(np.arange(1, 101).astype(str))], names=['Cohort', 'Sample'])
metrics_df = pd.DataFrame(index=idx, columns=['AUC', 'PPV', 'Specificity'], dtype=float)

In [None]:
metrics_df

In [None]:
for task in tasks:
    all_auc, all_ppv, all_specificity = bootstrap_predict(X_test, y_test, cohorts_test, task, model,
                                                          tasks=tasks, num_bootstrap_samples=num_bootstrapped_samples)
    metrics_df.loc[str(task), 'AUC'] = all_auc
    metrics_df.loc[str(task), 'PPV'] = all_ppv
    metrics_df.loc[str(task), 'Specificity'] = all_specificity

In [None]:
metrics_df

In [None]:
metrics_df.query("Cohort != 'Micro' and Cohort != 'Macro'").groupby('Sample').mean()

In [None]:
metrics_df.query("Cohort != 'Micro' and Cohort != 'Macro'").groupby('Sample').mean()['AUC'].values

In [None]:
metrics_df.loc['Macro', 'AUC'] = metrics_df.query("Cohort != 'Micro' and Cohort != 'Macro'").groupby('Sample').mean()['AUC'].values
metrics_df.loc['Macro', 'PPV'] = metrics_df.query("Cohort != 'Micro' and Cohort != 'Macro'").groupby('Sample').mean()['PPV'].values
metrics_df.loc['Macro', 'Specificity'] = metrics_df.query("Cohort != 'Micro' and Cohort != 'Macro'").groupby('Sample').mean()['Specificity'].values

In [None]:
metrics_df.loc['Macro', :]

In [None]:
all_auc, all_ppv, all_specificity = bootstrap_predict(X_test, y_test, cohorts_test, 'all', model,
                                                      tasks=tasks, num_bootstrap_samples=num_bootstrapped_samples)
metrics_df.loc['Micro', 'AUC'] = all_auc
metrics_df.loc['Micro', 'PPV'] = all_ppv
metrics_df.loc['Micro', 'Specificity'] = all_specificity

In [None]:
results_filename