# AIClinician core code
Builds 500 models using MIMIC-III training data  
Records best candidate models along the way from off-policy policy evaluation on MIMIC-III validation data  
Tests the best model on eRI data  


TAKES:  
        - MIMICtable = m*59 table with raw values from MIMIC  
        - eICUtable = n*56 table with raw values from eICU  
        

GENERATES:  
        - MIMICraw = MIMIC RAW DATA m*47 array with columns in right order  
        - MIMICzs = MIMIC ZSCORED m*47 array with columns in right order, matching MIMICraw  
        - eICUraw = eICU RAW DATA n*47 array with columns in right order, matching MIMICraw  
        - eICUzs = eICU ZSCORED n*47 array with columns in right order, matching MIMICraw  
        - recqvi = summary statistics of all 500 models  
        - idxs = state membership of MIMIC test records, for all 500 models  
        - OA = optimal policy, for all 500 models  
        - allpols = detailed data about the best candidate models  


In [10]:
############################  IMPORT DATA   #####################################
MIMICtable = pd.read_csv('/Users/faaiz/exportdir/MIMICtable.csv')
MIMICtable.head()

Unnamed: 0,bloc,icustay_id,charttime,gender,age,elixhauser,re_admission,died_in_hosp,died_within_48h_of_out_time,mortality_90d,...,mechvent,Shock_Index,PaO2_FiO2,median_dose_vaso,max_dose_vaso,input_total,input_4hourly,output_total,output_4hourly,cumulated_balance
0,1.0,1006.0,4330306000.0,0.0,25154.170995,3.0,0.0,1.0,1.0,1.0,...,0.0,0.908732,257.5,0.0,0.0,0.0,0.0,8166.0,4083.0,-8166.0
1,3.0,1006.0,4330335000.0,0.0,25154.170995,3.0,0.0,1.0,1.0,1.0,...,0.0,0.728457,294.285714,0.0,0.0,0.0,0.0,12249.0,4083.0,-12249.0
2,6.0,1006.0,4330378000.0,0.0,25154.170995,3.0,0.0,1.0,1.0,1.0,...,0.0,0.912791,96.0,0.0,0.0,275.0,275.0,15932.0,3683.0,-15657.0
3,1.0,1204.0,4794583000.0,1.0,29424.868472,5.0,0.0,0.0,0.0,1.0,...,0.0,0.504678,116.666667,0.0,0.0,5036.666667,36.666667,5025.0,2445.0,11.666667
4,2.0,1204.0,4794597000.0,1.0,29424.868472,5.0,0.0,0.0,0.0,1.0,...,0.0,0.528184,116.666667,0.0,0.0,5708.720883,672.054217,7140.0,2115.0,-1431.279117


In [16]:
import pandas as pd
import numpy as np

############################  MODEL PARAMETERS   #####################################
nr_reps=500               # nr of repetitions (total nr models)
nclustering=32            # how many times we do clustering (best solution will be chosen)
prop=0.25                 # proportion of the data we sample for clustering
gamma=0.99                # gamma
transthres=5              # threshold for pruning the transition matrix
polkeep=1                 # count of saved policies
ncl=750                   # nr of states
nra=5                     # nr of actions (2 to 10)
ncv=5                     # nr of crossvalidation runs (each is 80% training / 20% test)
OA=pd.DataFrame(index = range(1,753), columns = range(nr_reps)) # record of optimal actions
recqvi=pd.DataFrame(index = range(nr_reps*2), columns = range(30)) # saves data about each model (1 row per model)
allpols=pd.DataFrame(index = range(nr_reps), columns = range(15))

In [25]:
#################   Convert training data and compute conversion factors    ######################

# all 47 columns of interest
colbin = ['gender','mechvent','max_dose_vaso','re_admission']
colnorm=['age','Weight_kg','GCS','HR','SysBP','MeanBP','DiaBP','RR','Temp_C','FiO2_1',\
    'Potassium','Sodium','Chloride','Glucose','Magnesium','Calcium',\
    'Hb','WBC_count','Platelets_count','PTT','PT','Arterial_pH','paO2','paCO2',\
    'Arterial_BE','HCO3','Arterial_lactate','SOFA','SIRS','Shock_Index','PaO2_FiO2','cumulated_balance']
collog=['SpO2','BUN','Creatinine','SGOT','SGPT','Total_bili','INR','input_total','input_4hourly','output_total','output_4hourly']

MIMICraw = MIMICtable[colbin+colnorm+collog].copy()

for col in MIMICraw:
    if col in colbin:
        MIMICraw[col] = MIMICraw[col] - 0.5
    elif col in colnorm:
        MIMICraw[col] = (MIMICraw[col] - MIMICraw[col].mean())/MIMICraw[col].std()
    else:
        log_values = np.log(0.1 + MIMICraw[col])
        MIMICraw[col] = (log_values - log_values.mean())/log_values.std()

                         

Unnamed: 0,gender,mechvent,max_dose_vaso,re_admission,age,Weight_kg,GCS,HR,SysBP,MeanBP,...,BUN,Creatinine,SGOT,SGPT,Total_bili,INR,input_total,input_4hourly,output_total,output_4hourly
0,-0.5,-0.5,-0.5,-0.5,0.162442,-0.932032,0.452381,0.668997,-0.342088,-0.523059,...,-0.28204,-0.425292,0.060817,-0.752855,-1.18496,1.087071,-2.609263,-1.592771,-0.369111,0.360191
1,-0.5,-0.5,-0.5,-0.5,0.162442,-0.101282,0.452381,0.836799,1.264227,1.489449,...,-0.28204,-0.425292,0.344047,0.396062,0.085004,1.087071,-2.609263,-1.592771,-0.198429,0.360191
2,-0.5,-0.5,-0.5,-0.5,0.162442,4.704987,0.452381,0.881415,-0.152727,0.319619,...,-0.389138,-0.425292,-0.200857,-0.464689,-0.15037,-0.780499,-0.400705,0.666638,-0.087765,0.32581
3,0.5,-0.5,-0.5,-0.5,1.006716,-1.128391,0.452381,-0.770583,1.53988,-0.404575,...,1.892863,1.587865,0.060817,-0.464689,-0.453821,0.102814,0.410073,0.092482,-0.573506,0.189199
4,0.5,-0.5,-0.5,-0.5,1.006716,-1.128391,0.452381,-1.401212,0.105395,-1.192964,...,1.892863,1.587865,0.060817,-0.464689,-0.453821,0.102814,0.445001,0.921502,-0.425631,0.140851
