In [1]:
import sys
import os
import pandas as pd
import seaborn as sns

In [2]:
sys.path.append('..')

In [3]:
from icenode.train_icenode_2lr import ICENODE
from icenode.train_gram import GRAM
from icenode.train_retain import RETAIN

%load_ext autoreload
%autoreload 2



## Define Directories

In [4]:
HOME = os.environ.get('HOME')

# MIMIC-III Dataset Directory
mimic3_dir = f'{HOME}/GP/ehr-data/mimic3-transforms'

# ICE-NODE/M trained on MIMIC-III training partition (70%)
icenode_m3_params = f'{HOME}/GP/ehr-data/icenode-m3-exp/v0.2.20.dynM3_icenode_2lr_M/trial_097/step0045_params.pickle'
icenode_m3_config = f'{HOME}/GP/ehr-data/icenode-m3-exp/v0.2.20.dynM3_icenode_2lr_M/trial_097/config.json'

# RETAIN trained on MIMIC-III training partition (70%)
# retain_m3_dir = f'{HOME}/GP/ehr-data/icenode-m3-exp/v0.1.23M3_retain_M/frozen_trial_142'

# # GRU=GRAM/M trained on MIMIC-III training partition (70%)
# gru_m3_dir = f'{HOME}/GP/ehr-data/icenode-m3-exp/v0.1.23M3_gram_M/frozen_trial_615'

# # GRAM=GRAM/G trained on MIMIC-III training partition (70%)
# gram_m3_dir = f'{HOME}/GP/ehr-data/icenode-m3-exp/v0.1.23M3_gram_G/frozen_trial_442'

## Patient Interface for each Model

In [5]:
icnode_patient_interface = ICENODE.create_patient_interface(mimic3_dir, 'M3')

2022-02-22 17:24:45.197046: E external/org_tensorflow/tensorflow/stream_executor/cuda/cuda_driver.cc:271] failed call to cuInit: CUDA_ERROR_UNKNOWN: unknown error


## Dataset Partitioning

In [6]:
import random

# seed 42 is used in all our experiments in this work.
rng = random.Random(42)
subjects_id = list(icnode_patient_interface.subjects.keys())
rng.shuffle(subjects_id)

# splits = train:val:test = 0.7:.15:.15
splits = int(.7 * len(subjects_id)), int(.85 * len(subjects_id))

train_ids = subjects_id[:splits[0]]
valid_ids = subjects_id[splits[0]:splits[1]]
test_ids = subjects_id[splits[1]:]

## Load Configs and Trained Params

In [7]:
from icenode.utils import load_config, load_params

icenode_config = load_config(icenode_m3_config)
icenode_params = load_params(icenode_m3_params)

# retain_config = load_config(f'{retain_m3_dir}/config.json')
# retain_params = load_params(f'{retain_m3_dir}/step0100_params.pickle')

# gru_config = load_config(f'{gru_m3_dir}/config.json')
# gru_params = load_params(f'{gru_m3_dir}/step0100_params.pickle')

# gram_config = load_config(f'{gram_m3_dir}/config.json')
# gram_params = load_params(f'{gram_m3_dir}/step0100_params.pickle')

## Create Model Objects

In [8]:
icenode_config

{'emb': {'diag': {'embeddings_dim': 300}, 'kind': 'matrix'},
 'model': {'ode_dyn': 'mlp3',
  'ode_init_var': 1.0776395319697402e-07,
  'ode_with_bias': False,
  'state_size': 30,
  'timescale': 7},
 'training': {'batch_size': 256,
  'decay_rate1': 0.27729587471948475,
  'decay_rate2': 0.3293406876704547,
  'epochs': 60,
  'loss_mixing': {'L_dyn': 1000.0, 'L_l1': 0, 'L_l2': 0},
  'lr1': 7.156197978746877e-05,
  'lr2': 0.0011484691965460785,
  'optimizer': 'adam'}}

In [9]:
icenode = ICENODE.create_model(icenode_config, icnode_patient_interface, train_ids, None)
model_state = icenode.init_with_params(icenode_config, icenode_params)
# retain = RETAIN.create_model(retain_config, retain_patient_interface, train_ids, None)
# gru = GRAM.create_model(gru_config, gram_patient_interface, train_ids, None)
# gram = GRAM.create_model(gram_config, gram_patient_interface, train_ids, None)

In [10]:
code_partitions = ICENODE.code_partitions(icnode_patient_interface, train_ids) 

In [11]:
from icenode.metrics import evaluation_table
res = icenode.eval(model_state, test_ids)

In [12]:
eval_df, _ = evaluation_table({'TST': res}, code_partitions)



In [13]:
eval_df

Unnamed: 0,TST
ACC-P0,0.251775
ACC-P1,0.50744
ACC-P2,0.675935
ACC-P3,0.868588
ACC-P4,0.947858
Kfe,23.396
MACRO-AUC,0.920362
MICRO-AUC,0.924925
accuracy,0.880432
admissions_count,1009.0


## Analyse AUC for Each Admission in the Test Partition

In [14]:
icenode_auc_df = icenode.admissions_auc_scores(model_state, test_ids)

In [15]:
icenode_auc_df

Unnamed: 0,SUBJECT_ID,HADM_ID,HADM_IDX,AUC,N_CODES,TIME,INTERVALS,LOS,R/T,NFE
0,19842,169734,0,0.976900,7.0,412.5,412.5,3.5,0.0,26.0
1,19842,186772,1,0.939872,13.0,433.5,21.0,7.5,0.0,20.0
2,19842,180229,2,0.947650,8.0,448.5,15.0,3.5,0.0,20.0
3,19842,122439,3,0.944203,12.0,509.5,61.0,7.5,0.0,20.0
4,19842,163738,4,0.983078,11.0,552.5,43.0,5.5,0.0,20.0
...,...,...,...,...,...,...,...,...,...,...
1003,26421,192868,1,0.955274,5.0,278.5,186.0,3.5,0.0,26.0
1004,2558,105758,0,0.910348,9.0,59.5,59.5,4.5,0.0,20.0
1005,2558,194247,1,0.973860,7.0,70.5,11.0,4.5,0.0,14.0
1006,10832,196835,0,0.973860,7.0,647.5,647.5,8.5,0.0,26.0


In [16]:
auc_subject = icenode_auc_df.groupby('SUBJECT_ID').agg({'AUC': 'mean', 'N_CODES': ['min', 'max', 'mean', 'median'], 'INTERVALS': ['mean'], 'R/T': ['min', 'max', 'mean'] })

In [17]:
auc_subject['R/T']['mean'].max()

0.0

In [19]:
interest_subjects =  auc_subject[(auc_subject.AUC['mean'] > 0.95)]
interest_subjects

Unnamed: 0_level_0,AUC,N_CODES,N_CODES,N_CODES,N_CODES,INTERVALS,R/T,R/T,R/T
Unnamed: 0_level_1,mean,min,max,mean,median,mean,min,max,mean
SUBJECT_ID,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
154,0.999475,3.0,4.0,3.500000,3.5,1824.750000,0.0,0.0,0.0
299,1.000000,1.0,1.0,1.000000,1.0,7.500000,0.0,0.0,0.0
731,0.972845,10.0,10.0,10.000000,10.0,26.500000,0.0,0.0,0.0
1080,1.000000,3.0,3.0,3.000000,3.0,5.500000,0.0,0.0,0.0
1135,0.976817,14.0,14.0,14.000000,14.0,23.500000,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...
96371,0.962220,11.0,11.0,11.000000,11.0,1017.500000,0.0,0.0,0.0
96491,0.987552,1.0,1.0,1.000000,1.0,86.500000,0.0,0.0,0.0
96686,0.951499,13.0,21.0,16.333333,16.0,158.388889,0.0,0.0,0.0
96848,0.963399,17.0,17.0,17.000000,17.0,15.500000,0.0,0.0,0.0


In [20]:
interest_subjects_l = interest_subjects.index.tolist()
interest_subjects_l

[154,
 299,
 731,
 1080,
 1135,
 1935,
 2258,
 2619,
 3191,
 3393,
 4571,
 4831,
 4910,
 5321,
 5620,
 5944,
 6038,
 6202,
 6502,
 6706,
 6783,
 6828,
 6969,
 7102,
 7118,
 7211,
 7301,
 7382,
 7534,
 7671,
 7731,
 7752,
 7837,
 7936,
 8283,
 8619,
 8897,
 9480,
 9486,
 9517,
 9555,
 9602,
 9923,
 10226,
 10376,
 11045,
 11338,
 11474,
 11479,
 11677,
 11861,
 12183,
 12449,
 12582,
 12773,
 13052,
 13179,
 13208,
 13316,
 13528,
 13719,
 14106,
 14332,
 14598,
 14749,
 14835,
 15153,
 15341,
 15357,
 15370,
 15453,
 15508,
 15686,
 15732,
 15883,
 16934,
 17531,
 17548,
 17794,
 18430,
 18454,
 18689,
 19344,
 19370,
 19488,
 19772,
 19793,
 19842,
 19894,
 20173,
 20217,
 20552,
 20749,
 20776,
 20862,
 20993,
 21297,
 21301,
 21454,
 21510,
 21700,
 22206,
 22225,
 22297,
 22392,
 22549,
 22600,
 22954,
 23489,
 23706,
 23785,
 24074,
 25027,
 25143,
 25148,
 25337,
 25473,
 25510,
 25801,
 25929,
 26228,
 26459,
 26698,
 26847,
 27020,
 27242,
 27468,
 27795,
 28902,
 28960,
 29123

In [None]:
icenode_auc_df.to_csv('icenode_auc_df.csv')

In [None]:
icenode_auc_df['NFE'] = icenode_auc_df['NFE'].astype(int)
icenode_auc_df['NFEpday'] = icenode_auc_df['NFE'].astype(int) / icenode_auc_df['INTERVALS']
icenode_auc_df['N_CODES'] = icenode_auc_df['N_CODES'].astype(int)


In [None]:
icenode_auc_df.dtypes

In [None]:
icenode_stat = icenode_auc_df[['AUC', 'N_CODES', 'INTERVALS', 'LOS', 'NFE', 'NFEpday']]

In [None]:
icenode_stat

In [None]:
import seaborn as sns
# sns.pairplot(icenode_stat)

In [None]:
trajectory = icenode.sample_trajectory(model_state, interest_subjects_l, 2)

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
data = {}
for i, traj in trajectory.items():
    
    ccs_codes, ccs_idx = icnode_patient_interface.diag_ccs_history(i)
    hist_idx = np.array(list(ccs_idx))
    t = traj['t']
    d = np.log(traj['d'])
    tp10 = traj['tp10']
    
    logprob = []
    time = []
    code = []
    tp = []
    for ccs_c, ccs_i in zip(ccs_codes, ccs_idx):
        time.append(t)
        code.extend([ccs_c]*len(t))
        logprob.append(d[:, ccs_i])
        tp.append(tp10[:, ccs_i])
    
    logprob = np.hstack(logprob)
    time = np.hstack(time)
    tp = np.hstack(tp)
    data[i] = pd.DataFrame({'t': time, r'$\log\hat{v}$': logprob, 'code': code, 'tp': tp})
    

In [None]:
trajectory.keys()

In [None]:
for i in data:
    plt.figure(i)
    sns.lineplot(data=data[i], x="t", y=r'$\log\hat{v}$', hue='code', style='tp')