# AIClinician core code
Builds 500 models using MIMIC-III training data  
Records best candidate models along the way from off-policy policy evaluation on MIMIC-III validation data  
Tests the best model on eRI data  


TAKES:  
        - MIMICtable = m*59 table with raw values from MIMIC  
        - eICUtable = n*56 table with raw values from eICU  
        

GENERATES:  
        - MIMICraw = MIMIC RAW DATA m*47 array with columns in right order  
        - MIMICzs = MIMIC ZSCORED m*47 array with columns in right order, matching MIMICraw  
        - eICUraw = eICU RAW DATA n*47 array with columns in right order, matching MIMICraw  
        - eICUzs = eICU ZSCORED n*47 array with columns in right order, matching MIMICraw  
        - recqvi = summary statistics of all 500 models  
        - idxs = state membership of MIMIC test records, for all 500 models  
        - OA = optimal policy, for all 500 models  
        - allpols = detailed data about the best candidate models  


In [137]:
############################  IMPORT DATA   #####################################
MIMICtable = pd.read_csv('/Users/faaiz/exportdir/MIMICtable.csv')
MIMICtable.head()

Unnamed: 0,bloc,icustay_id,charttime,gender,age,elixhauser,re_admission,died_in_hosp,died_within_48h_of_out_time,mortality_90d,...,mechvent,Shock_Index,PaO2_FiO2,median_dose_vaso,max_dose_vaso,input_total,input_4hourly,output_total,output_4hourly,cumulated_balance
0,1.0,1006.0,4330306000.0,0.0,25154.170995,3.0,0.0,1.0,1.0,1.0,...,0.0,0.908732,257.5,0.0,0.0,0.0,0.0,8166.0,4083.0,-8166.0
1,3.0,1006.0,4330335000.0,0.0,25154.170995,3.0,0.0,1.0,1.0,1.0,...,0.0,0.728457,294.285714,0.0,0.0,0.0,0.0,12249.0,4083.0,-12249.0
2,6.0,1006.0,4330378000.0,0.0,25154.170995,3.0,0.0,1.0,1.0,1.0,...,0.0,0.912791,104.727273,0.0,0.0,275.0,275.0,15932.0,3683.0,-15657.0
3,1.0,1204.0,4794583000.0,1.0,29424.868472,5.0,0.0,0.0,0.0,1.0,...,0.0,0.504678,116.666667,0.0,0.0,5036.666667,36.666667,5025.0,2445.0,11.666667
4,2.0,1204.0,4794597000.0,1.0,29424.868472,5.0,0.0,0.0,0.0,1.0,...,0.0,0.528184,116.666667,0.0,0.0,5708.720883,672.054217,7140.0,2115.0,-1431.279117


In [39]:
import pandas as pd
import numpy as np

############################  MODEL PARAMETERS   #####################################
nr_reps=500               # nr of repetitions (total nr models)
nclustering=32            # how many times we do clustering (best solution will be chosen)
prop=0.25                 # proportion of the data we sample for clustering
gamma=0.99                # gamma
transthres=5              # threshold for pruning the transition matrix
polkeep=1                 # count of saved policies
ncl=750                   # nr of states
nra=5                     # nr of actions (2 to 10)
ncv=5                     # nr of crossvalidation runs (each is 80% training / 20% test)
OA=pd.DataFrame(index = range(1,753), columns = range(nr_reps)) # record of optimal actions
recqvi=pd.DataFrame(index = range(nr_reps*2), columns = range(30)) # saves data about each model (1 row per model)
allpols=pd.DataFrame(index = range(nr_reps), columns = range(15))

In [40]:
#################   Convert training data and compute conversion factors    ######################

# all 47 columns of interest
colbin = ['gender','mechvent','max_dose_vaso','re_admission']
colnorm=['age','Weight_kg','GCS','HR','SysBP','MeanBP','DiaBP','RR','Temp_C','FiO2_1',\
    'Potassium','Sodium','Chloride','Glucose','Magnesium','Calcium',\
    'Hb','WBC_count','Platelets_count','PTT','PT','Arterial_pH','paO2','paCO2',\
    'Arterial_BE','HCO3','Arterial_lactate','SOFA','SIRS','Shock_Index','PaO2_FiO2','cumulated_balance']
collog=['SpO2','BUN','Creatinine','SGOT','SGPT','Total_bili','INR','input_total','input_4hourly','output_total','output_4hourly']

MIMICraw = MIMICtable[colbin+colnorm+collog].copy()

for col in MIMICraw:
    if col in colbin:
        MIMICraw[col] = MIMICraw[col] - 0.5
    elif col in colnorm:
        cmu = MIMICraw[col].mean()
        csigma = MIMICraw[col].std()
        MIMICraw[col] = (MIMICraw[col] - cmu)/csigma
    else:
        log_values = np.log(0.1 + MIMICraw[col])
        dmu = log_values.mean()
        dsigma = log_values.std()
        MIMICraw[col] = (log_values - dmu)/dsigma                 

In [41]:
MIMICraw['re_admission'] = np.log(MIMICraw['re_admission'] + 0.6)
MIMICraw['input_4hourly'] = 2*MIMICraw['input_4hourly']

In [42]:
MIMICraw.head()

Unnamed: 0,gender,mechvent,max_dose_vaso,re_admission,age,Weight_kg,GCS,HR,SysBP,MeanBP,...,BUN,Creatinine,SGOT,SGPT,Total_bili,INR,input_total,input_4hourly,output_total,output_4hourly
0,-0.5,-0.5,-0.5,-2.302585,0.162442,-0.932032,0.452381,0.668997,-0.342088,-0.523059,...,-0.28204,-0.425292,0.060817,-0.752855,-1.18496,1.087071,-2.609263,-3.185541,-0.369111,0.360191
1,-0.5,-0.5,-0.5,-2.302585,0.162442,-0.101282,0.452381,0.836799,1.264227,1.489449,...,-0.28204,-0.425292,0.344047,0.396062,0.085004,1.087071,-2.609263,-3.185541,-0.198429,0.360191
2,-0.5,-0.5,-0.5,-2.302585,0.162442,4.704987,0.452381,0.881415,-0.152727,0.319619,...,-0.389138,-0.425292,-0.200857,-0.464689,-0.15037,-0.780499,-0.400705,1.333276,-0.087765,0.32581
3,0.5,-0.5,-0.5,-2.302585,1.006716,-1.128391,0.452381,-0.770583,1.53988,-0.404575,...,1.892863,1.587865,0.060817,-0.464689,-0.453821,0.102814,0.410073,0.184964,-0.573506,0.189199
4,0.5,-0.5,-0.5,-2.302585,1.006716,-1.128391,0.452381,-1.401212,0.105395,-1.192964,...,1.892863,1.587865,0.060817,-0.464689,-0.453821,0.102814,0.445001,1.843004,-0.425631,0.140851


In [46]:
from sklearn.cluster import KMeans
from scipy.stats import rankdata

icuuniqueids = MIMICtable['icustay_id'].unique()

for model in range(nr_reps):
    grp = np.floor(5*np.random.rand(len(icuuniqueids))+1)
    crossval = 1
    trainidx = icuuniqueids[grp != crossval]
    testidx = icuuniqueids[grp == crossval]
    X = MIMICraw[MIMICtable['icustay_id'].isin(trainidx)]
    Xtestmimic = MIMICraw[MIMICtable['icustay_id'].isin(testidx)]
    blocs = MIMICtable.loc[MIMICtable['icustay_id'].isin(trainidx), 'bloc']
    bloctestmimic = MIMICtable.loc[MIMICtable['icustay_id'].isin(trainidx), 'bloc']
    ptid = MIMICtable.loc[MIMICtable['icustay_id'].isin(trainidx), 'icustay_id']
    ptidtestmimic = MIMICtable.loc[MIMICtable['icustay_id'].isin(testidx), 'icustay_id']
    Y90 = MIMICtable.loc[MIMICtable['icustay_id'].isin(trainidx), 'mortality_90d']
    
    print('########################   MODEL NUMBER : ', model)
    
    #######   find best clustering solution (lowest intracluster variability)  ####################
    print('####  CLUSTERING  ####')
    sample = X[np.floor(np.random.rand(len(X))+prop) == 1]
    # TODO: change to 750 when using full dataset
    kmeans = KMeans(n_clusters=591, random_state=0).fit(MIMICraw)
    idx = kmeans.labels_
    
    print('############################# CREATE ACTIONS  ########################')
    nact = nra**2
    input_4hourly_nonzero = MIMICtable.loc[MIMICtable['input_4hourly']>0, 'input_4hourly']
    iol_ranked = rankdata(input_4hourly_nonzero)/len(input_4hourly_nonzero) # excludes zero fluid (will be action 1)
    iof = np.floor((iol_ranked + 0.2499999999)*4) # converts iv volume in 4 actions
    io = np.ones(len(MIMICtable)) # array of ones, by default
    io[MIMICtable['input_4hourly']>0] = iof + 1 # where more than zero fluid given: save actual action
    vc = MIMICtable['max_dose_vaso'].copy()
    vc_nonzero = MIMICtable.loc[MIMICtable['max_dose_vaso']!=0, 'max_dose_vaso']
    vc_ranked = rankdata(vc_nonzero)/len(vc_nonzero)
    vcf = np.floor((vc_ranked + 0.2499999999)*4) # converts to 4 bins
    vcf[vcf==0] = 1
    vc[vc!=0] = vcf + 1
    vc[vc==0] = 1
    # median dose of drug in all bins
    ma1 = [MIMICtable.loc[io==1, 'input_4hourly'].median(), MIMICtable.loc[io==2, 'input_4hourly'].median(), MIMICtable.loc[io==3, 'input_4hourly'].median(), MIMICtable.loc[io==4, 'input_4hourly'].median(), MIMICtable.loc[io==5, 'input_4hourly'].median()]
    ma2 = [MIMICtable.loc[vc==1, 'max_dose_vaso'].median(), MIMICtable.loc[vc==2, 'max_dose_vaso'].median(), MIMICtable.loc[vc==3, 'max_dose_vaso'].median(), MIMICtable.loc[vc==4, 'max_dose_vaso'].median(), MIMICtable.loc[vc==5, 'max_dose_vaso'].median()]
    
    
    
    

In [133]:
nact = nra**2
input_4hourly_nonzero = MIMICtable.loc[MIMICtable['input_4hourly']>0, 'input_4hourly']
iol_ranked = rankdata(input_4hourly_nonzero)/len(input_4hourly_nonzero) # excludes zero fluid (will be action 1)
iof = np.floor((iol_ranked + 0.2499999999)*4) # converts iv volume in 4 actions
io = np.ones(len(MIMICtable)) # array of ones, by default
io[MIMICtable['input_4hourly']>0] = iof + 1 # where more than zero fluid given: save actual action
vc = MIMICtable['max_dose_vaso'].copy()
vc_nonzero = MIMICtable.loc[MIMICtable['max_dose_vaso']!=0, 'max_dose_vaso']
vc_ranked = rankdata(vc_nonzero)/len(vc_nonzero)
vcf = np.floor((vc_ranked + 0.2499999999)*4) # converts to 4 bins
vcf[vcf==0] = 1
vc[vc!=0] = vcf + 1
vc[vc==0] = 1
# median dose of drug in all bins
ma1 = [MIMICtable.loc[io==1, 'input_4hourly'].median(), MIMICtable.loc[io==2, 'input_4hourly'].median(), MIMICtable.loc[io==3, 'input_4hourly'].median(), MIMICtable.loc[io==4, 'input_4hourly'].median(), MIMICtable.loc[io==5, 'input_4hourly'].median()]
ma2 = [MIMICtable.loc[vc==1, 'max_dose_vaso'].median(), MIMICtable.loc[vc==2, 'max_dose_vaso'].median(), MIMICtable.loc[vc==3, 'max_dose_vaso'].median(), MIMICtable.loc[vc==4, 'max_dose_vaso'].median(), MIMICtable.loc[vc==5, 'max_dose_vaso'].median()]



In [136]:
ma2

[0.0, 0.06, 0.1, 0.18, 0.24]

In [90]:
io+=1

In [93]:
io[io>2] = 4

In [94]:
io

array([4., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.,
       2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.,
       2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.,
       2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.,
       2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.,
       2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.,
       2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.,
       2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.,
       2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.,
       2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.,
       2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.,
       2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.,
       2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.,
       2., 2., 2., 2., 2.