# Testing Procedure: MIMIC

In [9]:
import pandas as pd
import os
import matplotlib.pyplot as plt
import numpy as np
os.chdir('/share/pi/boussard/eroosli_work/benchmarking')
import sys
if sys.version_info[0] == 3:
    from importlib import reload
import warnings
warnings.filterwarnings('ignore')
from imblearn.over_sampling import RandomOverSampler

from models.common_utils import optimal_epoch
from benchmarks import demographics as dm

Using TensorFlow backend.


## A) Find best epoch from training

In [2]:
optimal_epoch('2020-04-08_15-25.k_clstms.none.csv', 'mimic')

32


## B) Predictions on test data

In [6]:
mimic_pat_test, mimic_stays_test = dm.mimic_cohort_demographics('data/mimic/aug/mortality', 'test')

chosen option: test
EPISODE 3236 of 3236...

In [7]:
def get_ratios(df, variable):
    for group in df[variable].unique():
        df_group = df[df[variable]==group]
        y = df_group['IHM']
        print(y.sum(), y.sum()/(len(y)-y.sum()))

In [8]:
get_ratios(mimic_stays_test, 'ETHNICITY')

64.0 0.17066666666666666
265.0 0.1281431334622824
28.0 0.10646387832699619
8.0 0.0784313725490196
9.0 0.16666666666666666


In [15]:
def equalizer(df, variable):
    ros = RandomOverSampler(sampling_strategy=0.22)

    frames = []
    for group in df[variable].unique():
        df_group = df[df[variable]==group]
        X = df_group[['CSV', 'ETHNICITY', 'IHM']]
        y = df_group['IHM']
        X_res, y_res = ros.fit_resample(X, y)
        print(y_res.sum(), y_res.sum()/(len(y_res)-y_res.sum()))
        frames.append(X_res)

    result = pd.concat(frames)
    result.rename(columns={'CSV': 'stay', 'IHM': 'y_true'}, inplace=True)
    result['y_true'] = result['y_true'].apply(lambda d: int(d))
    result[['stay', 'y_true']].to_csv(os.path.join('data/mimic/aug/mortality', variable + '_test_listfile.csv'), index=False)

In [11]:
equalizer(mimic_stays_test, 'ETHNICITY')

71.0 0.18933333333333333
392.0 0.1895551257253385
49.0 0.18631178707224336
19.0 0.18627450980392157
10.0 0.18518518518518517


In [16]:
equalizer(mimic_stays_test, 'INSURANCE')

205.0 0.21995708154506438
350.0 0.219435736677116
7.0 0.21875
48.0 0.21621621621621623
17.0 0.20987654320987653


In [None]:

ros = RandomOverSampler(sampling_strategy=0.19)

frames = []
for ethnicity in [1,2,3,4]:
    df = mimic_stays_test[mimic_stays_test['ETHNICITY']==ethnicity]
    X = df[['CSV', 'ETHNICITY', 'IHM']]
    y = df['IHM']
    X_res, y_res = ros.fit_resample(X, y)
    print(y_res.sum(), y_res.sum()/(len(y_res)-y_res.sum()))
    frames.append(X_res)

result = pd.concat(frames)

result.rename(columns={'CSV': 'stay', 'IHM': 'y_true'}, inplace=True)
result['y_true'] = result['y_true'].apply(lambda d: int(d))
result[['stay', 'y_true']].to_csv(os.path.join('data/mimic/aug/mortality', 'harmonized_test_listfile.csv'), index=False)

In [20]:
%run models/ihm/main.py --network models/keras_models/channel_wise_lstms.py --mask_demographics "Ethnicity" "Gender" "Insurance" \
--data data/mimic/aug/mortality --data_name 'mimic' --dim 8 --depth 1 --batch_size 8 --dropout 0.3 --timestep 1.0 \
--load_state models/outputs/mimic/keras_states/2020-04-08_15-25_/2020-04-08_15-25_k_channel_wise_lstms_aug.n8.szc4.0.d0.3.dep1.bs8.ts1.0.epoch32.test0.28141715225643954.state --mode test --size_coef 4.0

normalizer state: /share/pi/boussard/eroosli_work/benchmarking/models/ihm/ihm_ts1.0.input_str_previous.start_time_zero.normalizer
==> using model models/keras_models/channel_wise_lstms.py
==> not used params in network class: dict_keys(['SMOTE', 'batch_size', 'beta_1', 'data', 'data_name', 'epochs', 'harmonizing', 'imputation', 'l1', 'l2', 'load_state', 'lr', 'mode', 'network', 'normalizer_state', 'optimizer', 'output_dir', 'prefix', 'save_every', 'small_part', 'target_repl_coef', 'timestep', 'validating', 'verbose'])
==> excluded demographics: ['Ethnicity', 'Gender', 'Insurance']
==> found 17 channels: ['Capillary refill rate', 'Diastolic blood pressure', 'Fraction inspired oxygen', 'Glascow coma scale eye opening', 'Glascow coma scale motor response', 'Glascow coma scale total', 'Glascow coma scale verbal response', 'Glucose', 'Heart rate', 'Height', 'Mean blood pressure', 'Oxygen saturation', 'Respiratory rate', 'Systolic blood pressure', 'Temperature', 'Weight', 'pH']
==> model.fin

## C) Evaluate test predictions

In [25]:
%run models/evaluation/evaluate_ihm.py 'models/outputs/mimic/predictions/results/TEST.2020-06-19_10-23.k_clstms.NONE.csv' \
--test_listfile 'data/mimic/aug/mortality/INSURANCE_test_listfile.csv' --output_dir 'models/outputs/mimic/predictions/confvals' --stratify

Saving the results in models/outputs/mimic/predictions/confvals/2020-06-19_10-46.stratified_ihm_results.json ...


In [26]:
%run models/evaluation/evaluate_ihm.py 'models/outputs/mimic/predictions/results/TEST.2020-06-19_10-23.k_clstms.NONE.csv' \
--test_listfile 'data/mimic/aug/mortality/INSURANCE_test_listfile.csv' --output_dir 'models/outputs/mimic/predictions/confvals'

Saving the results in models/outputs/mimic/predictions/confvals/2020-06-19_11-2.nonstratified_ihm_results.json ...
