In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
import random
import matplotlib.pyplot as plt
import os
import pickle as pickle

In [2]:
inv_action_map = {}
for iv in range(5):
    for vaso in range(5):
        inv_action_map[5*iv+vaso] = [iv,vaso]

In [3]:
print(inv_action_map)

{0: [0, 0], 1: [0, 1], 2: [0, 2], 3: [0, 3], 4: [0, 4], 5: [1, 0], 6: [1, 1], 7: [1, 2], 8: [1, 3], 9: [1, 4], 10: [2, 0], 11: [2, 1], 12: [2, 2], 13: [2, 3], 14: [2, 4], 15: [3, 0], 16: [3, 1], 17: [3, 2], 18: [3, 3], 19: [3, 4], 20: [4, 0], 21: [4, 1], 22: [4, 2], 23: [4, 3], 24: [4, 4]}


In [4]:
train_data = pd.read_csv('data_vasochange/train_withterm.csv')

In [5]:
val_data = pd.read_csv('data_vasochange/val_withterm.csv')

In [6]:
test_data = pd.read_csv('data_vasochange/test_withterm.csv')

In [7]:
train_data.head()

Unnamed: 0,bloc,icustayid,mortality_90d,MIMICzs1,MIMICzs2,MIMICzs3,MIMICzs4,MIMICzs5,MIMICzs6,MIMICzs7,...,MIMICzs44,MIMICzs45,MIMICzs46,MIMICzs47,action,shaped_reward,io_ac,vc_ac,abchange_vc,max_dose_vaso
0,1,3,1,-0.5,-0.5,-0.5,-2.302585,-0.978344,-0.1873,0.705956,...,0.391651,0.52761,0.786192,0.702781,10,-1.22427,2,0,0.0,0.0
1,2,3,1,-0.5,-0.5,-0.5,-2.302585,-0.978344,-0.1873,0.705956,...,0.394176,0.52761,0.793676,0.59653,10,1.640796,2,0,0.0,0.0
2,3,3,1,-0.5,-0.5,-0.5,-2.302585,-0.978344,-0.1873,0.705956,...,0.396682,0.52761,0.799286,0.51695,10,-0.025,2,0,0.0,0.0
3,4,3,1,-0.5,-0.5,-0.5,-2.302585,-0.978344,-0.155313,0.705956,...,0.399169,0.52761,0.805952,0.575231,10,-0.025,2,0,0.0,0.0
4,5,3,1,-0.5,-0.5,-0.5,-2.302585,-0.978344,-0.147317,0.705956,...,0.401637,0.52761,0.816227,0.714111,10,-0.025,2,0,0.0,0.0


In [8]:
def preproc(df):
    df_in = df.copy()
    keep_arr = ['MIMICzs1','MIMICzs2','MIMICzs3','MIMICzs4','MIMICzs5','MIMICzs6','MIMICzs7','MIMICzs8','MIMICzs9','MIMICzs10','MIMICzs11','MIMICzs12','MIMICzs13','MIMICzs14','MIMICzs15','MIMICzs16','MIMICzs17','MIMICzs18','MIMICzs19','MIMICzs20','MIMICzs21','MIMICzs22','MIMICzs23','MIMICzs24','MIMICzs25','MIMICzs26','MIMICzs27','MIMICzs28','MIMICzs29','MIMICzs30','MIMICzs31','MIMICzs32','MIMICzs33','MIMICzs34','MIMICzs35','MIMICzs36','MIMICzs37','MIMICzs38','MIMICzs39','MIMICzs40','MIMICzs41','MIMICzs42','MIMICzs43','MIMICzs44','MIMICzs45','MIMICzs46','MIMICzs47', 'abchange_vc']
    keep_arr.append('io_ac')
    keep_arr.append('vc_ac')
    df_in = df_in[np.abs(df_in['shaped_reward']) >=10]
    features = df_in[keep_arr].values
#made a change of  labels = df_in['shaped_reward'].values    to the follows
    labels = df_in['shaped_reward'].values/15.0
#    labels = labels.astype(int)
    labels[labels < 0] = 0
#    labels[labels>0] = 1

    return features,labels

In [9]:
def batch_sample(batch_size, features, labels):
    idx = np.random.choice(np.arange(len(features)), batch_size, replace=False)
    return (np.vstack(features[idx]), labels[idx])

In [10]:
train_feat, train_labels = preproc(train_data)
val_feat, val_labels = preproc(val_data)
test_feat, test_labels = preproc(test_data)

In [11]:
from sklearn.linear_model import LogisticRegression
clf = LogisticRegression(max_iter= 1000)
clf.fit(train_feat, train_labels)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=1000,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [12]:

# for each state: if not terminal, deterministically get reward. if terminal, use clf to predict outcome,
# and estimated reward = 15 * outcome

In [13]:
feat_names = ['MIMICzs1','MIMICzs2','MIMICzs3','MIMICzs4','MIMICzs5','MIMICzs6','MIMICzs7','MIMICzs8','MIMICzs9','MIMICzs10','MIMICzs11','MIMICzs12','MIMICzs13','MIMICzs14','MIMICzs15','MIMICzs16','MIMICzs17','MIMICzs18','MIMICzs19','MIMICzs20','MIMICzs21','MIMICzs22','MIMICzs23','MIMICzs24','MIMICzs25','MIMICzs26','MIMICzs27','MIMICzs28','MIMICzs29','MIMICzs30','MIMICzs31','MIMICzs32','MIMICzs33','MIMICzs34','MIMICzs35','MIMICzs36','MIMICzs37','MIMICzs38','MIMICzs39','MIMICzs40','MIMICzs41','MIMICzs42','MIMICzs43','MIMICzs44','MIMICzs45','MIMICzs46','MIMICzs47', 'abchange_vc']

In [14]:
envmodel_save_dir = 'env_model_regression/'
est_state_val = pickle.load(open( envmodel_save_dir + "est_next_states_val.p", "rb" ))
est_state_test =  pickle.load(open( envmodel_save_dir + "est_next_states_test.p", "rb" ))
est_state_train = pickle.load(open(envmodel_save_dir + "est_next_states_train.p", "rb"))

In [15]:
est_state_val = np.vstack(est_state_val)
est_state_test = np.vstack(est_state_test)
est_state_train = np.vstack(est_state_train)

In [16]:
# Get the relevant fields from the estimated next states
lact_index = feat_names.index('MIMICzs30')
sofa_index = feat_names.index('MIMICzs35')

val_data['est_lactate'] = est_state_val[:, lact_index]
val_data['est_sofa'] = est_state_val[:, sofa_index]

test_data['est_lactate'] = est_state_test[:, lact_index]
test_data['est_sofa'] = est_state_test[: , sofa_index]

train_data['est_lactate'] = est_state_train[:, lact_index]
train_data['est_sofa'] = est_state_train[:, sofa_index]

In [17]:
# undo the scaling on the next state prediction in order to calculate the reward
# unscaled = std_dev*(min_max_normalised*(max_norm - min_norm)) + mean

df_orig = pd.read_csv('data_vasochange/Al_SOFA.csv')
#df_orig_train = df_orig.loc[df_orig['icustayid'].isin(train_data['icustayid'])]

sofa_mean = df_orig['SOFA'].mean()
sofa_std = df_orig['SOFA'].std()
lact_mean = df_orig['Arterial_lactate'].mean()
lact_std = df_orig['Arterial_lactate'].std()

norm_sofa = (df_orig['SOFA'] - sofa_mean)/sofa_std
norm_lact = (df_orig['Arterial_lactate']-lact_mean)/lact_std

min_norm_sofa = min(norm_sofa)
max_norm_sofa = max(norm_sofa)

min_norm_lact = min(norm_lact)
max_norm_lact = max(norm_lact)

min_max_norm_sofa = (norm_sofa - min_norm_sofa)/(max_norm_sofa-min_norm_sofa)
min_max_norm_lact = (norm_lact - min_norm_lact)/(max_norm_lact-min_norm_lact)

In [19]:
# Add in the agent actions to both the validation and test dataframes
agent_actions_test = pickle.load(open('vasochange4_dqn_normal/dqn_normal_actions_test.p', "rb" ))
agent_actions_val = pickle.load(open('vasochange4_dqn_normal/dqn_normal_actions_val.p', "rb" ))
agent_actions_train = pickle.load(open('vasochange4_dqn_normal/dqn_normal_actions_train.p', "rb"))

In [20]:
#print(agent_actions_test)


In [21]:
#print(agent_actions_val)

In [22]:
# unpack the actions into tuples of (iv,vaso)inv_action_map = {}
#count = 0
#for i in range(5):
#    for j in range(5):
#        inv_action_map[count] = [i,j]
#        count += 1
print(inv_action_map)

{0: [0, 0], 1: [0, 1], 2: [0, 2], 3: [0, 3], 4: [0, 4], 5: [1, 0], 6: [1, 1], 7: [1, 2], 8: [1, 3], 9: [1, 4], 10: [2, 0], 11: [2, 1], 12: [2, 2], 13: [2, 3], 14: [2, 4], 15: [3, 0], 16: [3, 1], 17: [3, 2], 18: [3, 3], 19: [3, 4], 20: [4, 0], 21: [4, 1], 22: [4, 2], 23: [4, 3], 24: [4, 4]}


In [23]:
# unpack the actions into tuples of (iv,vaso)
test_actions_arr = np.array([inv_action_map[i] for i in agent_actions_test])
val_actions_arr = np.array([inv_action_map[i] for i in agent_actions_val])
train_actions_arr = np.array([inv_action_map[i] for i in agent_actions_train])

In [24]:
print(test_actions_arr)

[[2 2]
 [2 2]
 [2 1]
 ...
 [0 0]
 [2 0]
 [2 0]]


In [25]:
val_data['agent_iv'] = val_actions_arr[:,0]
val_data['agent_vaso'] = val_actions_arr[:,1]

test_data['agent_iv'] = test_actions_arr[:,0]
test_data['agent_vaso'] = test_actions_arr[:,1]

train_data['agent_iv'] = train_actions_arr[:,0]
train_data['agent_vaso'] = train_actions_arr[:,1]

In [26]:
c0 = -0.1/4
c1 = -0.5/4
c2 = -2
clf_features = ['MIMICzs1','MIMICzs2','MIMICzs3','MIMICzs4','MIMICzs5','MIMICzs6','MIMICzs7','MIMICzs8','MIMICzs9','MIMICzs10','MIMICzs11','MIMICzs12','MIMICzs13','MIMICzs14','MIMICzs15','MIMICzs16','MIMICzs17','MIMICzs18','MIMICzs19','MIMICzs20','MIMICzs21','MIMICzs22','MIMICzs23','MIMICzs24','MIMICzs25','MIMICzs26','MIMICzs27','MIMICzs28','MIMICzs29','MIMICzs30','MIMICzs31','MIMICzs32','MIMICzs33','MIMICzs34','MIMICzs35','MIMICzs36','MIMICzs37','MIMICzs38','MIMICzs39','MIMICzs40','MIMICzs41','MIMICzs42','MIMICzs43','MIMICzs44','MIMICzs45','MIMICzs46','MIMICzs47']
clf_features.append('agent_iv')
clf_features.append('agent_vaso')

#clf_features.append('io_ac')
#clf_features.append('vc_ac')

def reward_estimator(df):
    df['unscaled_sofa_now'] = sofa_std*(df['MIMICzs35']) + sofa_mean
    df['unscaled_sofa_next'] = sofa_std*(df['est_sofa']) + sofa_mean

    df['unscaled_lact_now'] = lact_std*(df['MIMICzs30']) + lact_mean
    df['unscaled_lact_next'] = lact_std*(df['est_lactate']) + lact_mean

    rewards = []
    for count,i in enumerate(df.index):
        if count == len(df) - 1 or df.loc[i, 'icustayid'] != df.loc[df.index[count+1], 'icustayid']:
            feat = df.loc[i,clf_features].values
            feat = feat.reshape(1,-1)
            est_outcome = clf.predict(feat)
            try:
                if est_outcome == 0:
                    rewards.append(-15)
                else:
                    rewards.append(15)
            except ValueError:
                print(est_outcome)
                print(len(est_outcome))
                raise
        else:
            lact_now = df.loc[i, 'unscaled_lact_now']
            sofa_now = df.loc[i, 'unscaled_sofa_now']
            lact_next = df.loc[i, 'unscaled_lact_next']
            sofa_next = df.loc[i, 'unscaled_sofa_next']
            reward = 0
            if sofa_next == sofa_now and sofa_next != 0:
                reward += c0
            reward += c1*(sofa_next-sofa_now)
            reward += c2*np.tanh(lact_next - lact_now)
            rewards.append(reward)
    return np.array(rewards)

In [27]:
val_rewards = reward_estimator(val_data)
test_rewards = reward_estimator(test_data)
#train_rewards = reward_estimator(train_data)

In [28]:

with open(r"val_rewards.p", "wb") as f:
    pickle.dump(val_rewards, f)

In [29]:
with open(r"test_rewards.p", "wb") as f:
    pickle.dump(test_rewards, f)

In [30]:
#with open(r"train_rewards.p", "wb") as f:
#    pickle.dump(train_rewards, f)

In [31]:
print(lact_index, sofa_index)

29 34
