# Testing Baseline with Bootstrapping (Global Model)

Importing the functions needed from the `mtl_patients` module:

In [1]:
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd

import sys
pathname = "../code/"
if pathname not in sys.path:
    sys.path.append("../code/")

from mtl_patients import run_mortality_prediction_task

2023-03-29 17:52:29.255112: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


## `run_mortality_prediction_task()` step by step

### Imports needed

In [15]:
import os
import tensorflow as tf
import numpy as np
import pandas as pd
import random
from keras.callbacks import EarlyStopping
from keras.layers import Input, Dense, LSTM, RepeatVector
from keras.models import Model, Sequential
from keras.optimizers import Adam
from sklearn.metrics import roc_auc_score, precision_score, recall_score
from sklearn.mixture import GaussianMixture
from sklearn.model_selection import train_test_split

In [4]:
from mtl_patients import set_global_determinism, prepare_data, stratified_split, create_single_task_learning_model

### Arguments

In [8]:
model_type='global'
cutoff_hours=24
gap_hours=12
save_to_folder='../data/'
cohort_criteria_to_select='careunits'
seed=0
cohort_unsupervised_filename='../data/unsupervised_clusters.npy'
lstm_layer_size=16
epochs=30
learning_rate=0.0001
use_cohort_inv_freq_weights=False
bootstrap=False
num_bootstrapped_samples=100
SEED=0

### Code common to all models

In [9]:
# setting the seeds to get reproducible results
# taken from https://stackoverflow.com/questions/36288235/how-to-get-stable-results-with-tensorflow-setting-random-seed
set_global_determinism(seed=seed)

# create folders to store models and results
for folder in ['results', 'models']:
    if not os.path.exists(os.path.join(save_to_folder, folder)):
        os.makedirs(os.path.join(save_to_folder, folder))

X, Y, careunits, sapsii_quartile, subject_ids = prepare_data(cutoff_hours=cutoff_hours, gap_hours=gap_hours)
Y = Y.astype(int) # Y is originally a boolean

print('+' * 80, flush=True)
print('Running the Mortality Prediction Task', flush=True)
print('-' * 80, flush=True)

# fetch right cohort criteria
if cohort_criteria_to_select == 'careunits':
    cohort_criteria = careunits
elif cohort_criteria_to_select == 'sapsii_quartile':
    cohort_criteria = sapsii_quartile
elif cohort_criteria_to_select == 'unsupervised':
    cohort_criteria = np.load(f"{cohort_unsupervised_filename}")

# Do train/validation/test split using `cohort_criteria` as the cohort classifier
print('    Splitting data into train/validation/test sets...', flush=True)
X_train, X_val, X_test, y_train, y_val, y_test, cohorts_train, cohorts_val, cohorts_test = \
    stratified_split(X, Y, cohort_criteria, train_val_random_seed=seed)

# one task by distinct cohort
tasks = np.unique(cohorts_train)

# calculate number of samples per cohort and its reciprocal
# (to be used in sample weight calculation)
print('    Calculating number of training samples in cohort...', flush=True)
task_weights = {}
for cohort in tasks:
    num_samples_in_cohort = len(np.where(cohorts_train == cohort)[0])
    print(f"        # of patients in cohort {cohort} is {str(num_samples_in_cohort)}")
    task_weights[cohort] = len(X_train) / num_samples_in_cohort

sample_weight = None
if use_cohort_inv_freq_weights:
    # calculate sample weight as the cohort's inverse frequency corresponding to each sample
    sample_weight = np.array([task_weights[cohort] for cohort in cohorts_train])

model_filename = f"{save_to_folder}models/model_{cutoff_hours}+{gap_hours}_{cohort_criteria_to_select}"
results_filename = f'{save_to_folder}results/model_{cutoff_hours}+{gap_hours}_{cohort_criteria_to_select}.h5'

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Preparing the data
--------------------------------------------------------------------------------
    Loading data from MIMIC-Extract pipeline...
    Adding SAPS II score to static dataset...
    Adding mortality columns to static dataset...
    Discretizing X...
        X.shape: (2200954, 33), X.subject_id.nunique(): 34472
        X_discrete.shape: (2200954, 225), X_discrete.subject_id.nunique(): 34472
    Keep only X_discrete[X_discrete.hours_in < 24]...
        New X_discrete.shape: (808539, 223), new X_discrete.subject_id.nunique(): 34472
    Padding patients with less than 24 hours of data...
    Merging dataframes to create X_full...
    Mortality per careunit...
        MICU: 1138 out of 11403
        SICU: 409 out of 5187
        CCU: 344 out of 4907
        CSRU: 139 out of 6971
        TSICU: 291 out of 4245
    Final shape of X: (32713, 24, 232)
    Number of positive samples: 2321
    Done!
+

### Global model common code

In [12]:
#-----------------------
# train the global model

print('    ' + '~' * 76)
print(f"    Training '{model_type}' model...")

model = create_single_task_learning_model(lstm_layer_size=lstm_layer_size, input_dims=X_train.shape[1:],
                                          output_dims=1, learning_rate=learning_rate)
print(model.summary())

early_stopping = EarlyStopping(monitor='val_loss', patience=4)

model.fit(X_train, y_train, epochs=epochs, batch_size=100, sample_weight=sample_weight,
          callbacks=[early_stopping], validation_data=(X_val, y_val))
model.save(model_filename)

print('    ' + '~' * 76)
print(f"    Predicting using '{model_type}' model...", flush=True)
y_scores = np.squeeze(model.predict(X_test))
y_pred = (y_scores > 0.5).astype("int32")

# calculate AUC, PPV, and Specificity for every cohort
# https://www.ncbi.nlm.nih.gov/pmc/articles/PMC8156826/
# https://stackoverflow.com/questions/56253863/precision-recall-and-confusion-matrix-problems-in-sklearn
# https://stackoverflow.com/questions/33275461/specificity-in-scikit-learn
# PPV (Predictive Positive Value) is same as precision
# Specificity is same as recall of the negative class... using that trick to get it in sklearn
lst_of_auc = []
lst_of_ppv = []
lst_of_specificity = []

    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    Training 'global' model...
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 16)                15936     
                                                                 
 dense (Dense)               (None, 1)                 17        
                                                                 
Total params: 15,953
Trainable params: 15,953
Non-trainable params: 0
_________________________________________________________________
None


2023-03-29 18:36:54.965921: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
INFO:tensorflow:Assets written to: ../data/models/model_24+12_careunits/assets
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    Predicting using 'global' model...


In [77]:
if not bootstrap:

    metrics_df = pd.DataFrame(index=np.append(tasks, ['Macro', 'Micro']))

    for task in tasks:
        auc = roc_auc_score(y_test[cohorts_test == task], y_scores[cohorts_test == task])
        ppv = precision_score(y_test[cohorts_test == task], y_pred[cohorts_test == task])
        specificity = recall_score(y_test[cohorts_test == task], y_pred[cohorts_test == task], pos_label=0)
        metrics_df.loc[task, 'AUC'] = auc
        metrics_df.loc[task, 'PPV'] = ppv
        metrics_df.loc[task, 'Specificity'] = specificity

    # calculate macro AUC
    metrics_df.loc['Macro', :] = metrics_df.loc[(metrics_df.index != 'Macro') & (metrics_df.index != 'Micro')].mean()

    # calculate micro AUC
    metrics_df.loc['Micro', 'AUC'] = roc_auc_score(y_test, y_scores)
    metrics_df.loc['Micro', 'PPV'] = precision_score(y_test, y_pred)
    metrics_df.loc['Micro', 'Specificity'] = recall_score(y_test, y_pred, pos_label=0)

In [78]:
metrics_df

Unnamed: 0,AUC,PPV,Specificity
CCU,0.8665,0.666667,0.993457
CSRU,0.889536,0.333333,0.997141
MICU,0.828538,0.661765,0.988731
SICU,0.853458,0.619048,0.991407
TSICU,0.866083,0.583333,0.993679
Macro,0.860823,0.572829,0.992883
Micro,0.86396,0.632,0.992433


In [13]:
from mtl_patients import bootstrap_predict

In [25]:
tasks

array(['CCU', 'CSRU', 'MICU', 'SICU', 'TSICU'], dtype=object)

In [51]:
task = 'all'
all_auc = bootstrap_predict(X_test, y_test, cohorts_test, task, model, num_bootstrap_samples=num_bootstrapped_samples)

    Bootstrap prediction for task "all"...


In [52]:
all_auc

[0.8739970418228951,
 0.8565794623662015,
 0.8714618159747236,
 0.8629457119194968,
 0.870499273927767,
 0.8497803347873687,
 0.8623905219211419,
 0.8720248055771425,
 0.8661502856073197,
 0.8670982920285211,
 0.8695966470211185,
 0.858191144187735,
 0.8450807188115106,
 0.8692095030375913,
 0.863687737887924,
 0.8475812009688526,
 0.850493644031743,
 0.8540055930251687,
 0.8654972460307107,
 0.8675524417015049,
 0.85117469127749,
 0.8618328502305845,
 0.8595521750968569,
 0.8777447515755201,
 0.8744497733860492,
 0.8693488323283662,
 0.8725211440175619,
 0.866978107220448,
 0.8688457578662552,
 0.8734620598896142,
 0.85658655291535,
 0.8508488805441003,
 0.8744075846186135,
 0.8657432880861757,
 0.849891301881548,
 0.865765977843452,
 0.8719588634700579,
 0.8530614864059991,
 0.8455944290973447,
 0.8638582655949537,
 0.8545614920784386,
 0.8665019768451028,
 0.8564185069005223,
 0.8538655546794789,
 0.8512126257154364,
 0.8423086686217675,
 0.8601644440158602,
 0.8678860520389584,
 0.

In [28]:
len(all_auc)

100

In [29]:
tasks

array(['CCU', 'CSRU', 'MICU', 'SICU', 'TSICU'], dtype=object)

In [43]:
tasks

array(['CCU', 'CSRU', 'MICU', 'SICU', 'TSICU'], dtype=object)

In [49]:
lst_of_tasks = list(tasks)
lst_of_tasks.append('Micro')
idx = pd.MultiIndex.from_product([lst_of_tasks, list(np.arange(1, 101))], names=['Cohort', 'Sample'])

In [50]:
idx

MultiIndex([(  'CCU',   1),
            (  'CCU',   2),
            (  'CCU',   3),
            (  'CCU',   4),
            (  'CCU',   5),
            (  'CCU',   6),
            (  'CCU',   7),
            (  'CCU',   8),
            (  'CCU',   9),
            (  'CCU',  10),
            ...
            ('Micro',  91),
            ('Micro',  92),
            ('Micro',  93),
            ('Micro',  94),
            ('Micro',  95),
            ('Micro',  96),
            ('Micro',  97),
            ('Micro',  98),
            ('Micro',  99),
            ('Micro', 100)],
           names=['Cohort', 'Sample'], length=600)

In [53]:
metrics2_df = pd.DataFrame(index=idx, columns=['AUC', 'PPV', 'Specificity'])

In [57]:
metrics2_df

Unnamed: 0_level_0,Unnamed: 1_level_0,AUC,PPV,Specificity
Cohort,Sample,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
CCU,1,,,
CCU,2,,,
CCU,3,,,
CCU,4,,,
CCU,5,,,
...,...,...,...,...
Micro,96,,,
Micro,97,,,
Micro,98,,,
Micro,99,,,


In [67]:
metrics2_df.loc['Micro', 'AUC'] = all_auc

In [69]:
metrics2_df.loc['Micro']

Unnamed: 0_level_0,AUC,PPV,Specificity
Sample,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,0.873997,,
2,0.856579,,
3,0.871462,,
4,0.862946,,
5,0.870499,,
...,...,...,...
96,0.873674,,
97,0.877892,,
98,0.863993,,
99,0.871884,,


In [34]:
list(tasks)

['CCU', 'CSRU', 'MICU', 'SICU', 'TSICU']

In [39]:
np.mean(all_auc, axis=0)

0.8635298880827722

In [76]:
metrics_df

Unnamed: 0,AUC,PPV,Specificity
CCU,0.8665,0.666667,0.993457
CSRU,0.889536,0.333333,0.997141
MICU,0.828538,0.661765,0.988731
SICU,0.853458,0.619048,0.991407
TSICU,0.866083,0.583333,0.993679
Macro,0.860823,0.572829,0.992883
Micro,0.86396,0.632,0.992433


In [73]:
metrics_df.loc[(metrics_df.index != 'Macro') & (metrics_df.index != 'Micro')].mean()

AUC            0.860823
PPV            0.572829
Specificity    0.992883
dtype: float64

In [75]:
metrics_df.loc['Macro', :] = metrics_df.loc[(metrics_df.index != 'Macro') & (metrics_df.index != 'Micro')].mean()

In [80]:
metrics2_df

Unnamed: 0_level_0,Unnamed: 1_level_0,AUC,PPV,Specificity
Cohort,Sample,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
CCU,1,,,
CCU,2,,,
CCU,3,,,
CCU,4,,,
CCU,5,,,
...,...,...,...,...
Micro,96,0.873674,,
Micro,97,0.877892,,
Micro,98,0.863993,,
Micro,99,0.871884,,


In [81]:
metrics2_df.mean()

AUC            0.863577
PPV                 NaN
Specificity         NaN
dtype: float64

In [82]:
metrics2_df.loc[(metrics2_df.index != 'Micro')]

Unnamed: 0_level_0,Unnamed: 1_level_0,AUC,PPV,Specificity
Cohort,Sample,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
CCU,1,,,
CCU,2,,,
CCU,3,,,
CCU,4,,,
CCU,5,,,
...,...,...,...,...
Micro,96,0.873674,,
Micro,97,0.877892,,
Micro,98,0.863993,,
Micro,99,0.871884,,


In [87]:
metrics2_df.query("Cohort != 'Micro'").mean()

AUC           NaN
PPV           NaN
Specificity   NaN
dtype: float64