In [1]:
# Learn the physician policy
# use ideas from MixNN-EM

In [2]:
%matplotlib inline
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
import math
import os
import random
import pandas as pd
from pandas import DataFrame
try:
    import cPickle as pickle
except ImportError:
    import _pickle as pickle
import copy

In [3]:
os.environ['CUDA_VISIBLE_DEVICES'] = '1'

In [4]:
with open('../data/state_features.txt') as f:
    state_features = f.read().split()

In [5]:
df = pd.read_csv('../data/rl_train_set_unscaled.csv')
val_df = pd.read_csv('../data/rl_val_set_unscaled.csv')
test_df = pd.read_csv('../data/rl_test_set_unscaled.csv')

In [6]:
# define an action mapping - how to get an id representing the action from the (iv,vaso) tuple
action_map = {}
count = 0
for iv in range(5):
    for vaso in range(5):
        action_map[(iv,vaso)] = count
        count += 1

In [7]:
inv_action_map = {}
for iv in range(5):
    for vaso in range(5):
        inv_action_map[5*iv+vaso] = [iv,vaso]    

In [9]:
iv_mean = df['iv_input'].mean()
iv_std = df['iv_input'].std()

vaso_mean = df['vaso_input'].mean()
vaso_std = df['vaso_input'].std()

np.save('iv_mean.npy', iv_mean)
np.save('iv_std.npy', iv_std)
np.save('vaso_mean.npy', vaso_mean)
np.save('vaso_std.npy', vaso_std)

In [12]:
df['iv_input_norm'] = (df['iv_input']- iv_mean)/iv_std
df['vaso_input_norm'] = (df['vaso_input'] - vaso_mean)/vaso_std

val_df['iv_input_norm'] = (val_df['iv_input']- iv_mean)/iv_std
val_df['vaso_input_norm'] = (val_df['vaso_input'] - vaso_mean)/vaso_std

test_df['iv_input_norm'] = (test_df['iv_input']- iv_mean)/iv_std
test_df['vaso_input_norm'] = (test_df['vaso_input'] - vaso_mean)/vaso_std

In [13]:
hist_feat = list(np.loadtxt('../data/state_features.txt', dtype=str))
hist_feat.append('iv_input_norm')
hist_feat.append('vaso_input_norm'')

In [14]:
#  X: (s_t-3, a_t-3, s_t-2, a_t-2,s_t-1, a_t-1, s_t )
#  Y: (difference between next state and current state (zeros if end of trajectory), mortality)

# this function pads with zeros (ie the mean) so that we still predict actions for all timesteps,
# not just those with only 3 steps of history or more.
hist = 3
action_bins = 5
def make_data_history_zeros(df_in):
    df_in = df_in.reset_index()
    X = []
    Y = []
    count_in_traj = 0
    for count,i in enumerate(df_in.index):
        if count % 10000 == 0 and count > 0:
            print (count)
        count_in_traj += 1
        
        # skip the last one; no next state
        if i == df_in.index[-1]:
            # the target is the action taken at this timestep
            target_arr = df_in.loc[i, ['iv_input', 'vaso_input']].values
            target = int(action_bins*target_arr[0] + target_arr[1])

            Y.append(target)
            
            if count_in_traj >=(hist+1):                
                # use hist_feat for old ones
                state = df_in.loc[i-hist, hist_feat]
                for index in range(hist-1,0,-1):
                    state = np.hstack([state,df_in.loc[i-index, hist_feat]])
                
                # for current state, use state_features because we don't want to pass in the action!
                state = np.hstack([state, df_in.loc[i, state_features]])
                
                X.append(state)
            else:
                num_zeros = (hist+1) - count_in_traj
                num_actual = count_in_traj - 1
                state = np.hstack([np.zeros(len(hist_feat)) for _ in range(num_zeros)])
                for index in range(num_actual, 0, -1):
                    state = np.hstack([state,df_in.loc[i-index, hist_feat]])
                state = np.hstack([state, df_in.loc[i, state_features]])
                X.append(state)             
            # finish to avoid index error
            break
       
        # if not terminal step in trajectory    
        if df_in.loc[i, 'icustayid'] == df_in.loc[i+1, 'icustayid']:
            # the target is the action taken at this timestep
            target_arr = df_in.loc[i, ['iv_input', 'vaso_input']].values
            target = int(action_bins*target_arr[0] + target_arr[1])

            Y.append(target)
            
            if count_in_traj >=(hist+1):                
                # use hist_feat for old ones
                state = df_in.loc[i-hist, hist_feat]
                for index in range(hist-1,0,-1):
                    state = np.hstack([state,df_in.loc[i-index, hist_feat]])
                
                # for current state, use state_features because we don't want to pass in the action!
                state = np.hstack([state, df_in.loc[i, state_features]])
                
                X.append(state)
            else:
                num_zeros = (hist+1) - count_in_traj
                num_actual = count_in_traj - 1
                state = np.hstack([np.zeros(len(hist_feat)) for _ in range(num_zeros)])
                for index in range(num_actual, 0, -1):
                    state = np.hstack([state,df_in.loc[i-index, hist_feat]])
                state = np.hstack([state, df_in.loc[i, state_features]])
                X.append(state)   
        
        else:
            # the target is the action taken at this timestep
            target_arr = df_in.loc[i, ['iv_input', 'vaso_input']].values
            target = int(action_bins*target_arr[0] + target_arr[1])

            Y.append(target)
            
            if count_in_traj >=(hist+1):    
                # use hist_feat for old ones
                state = df_in.loc[i-hist, hist_feat]
                for index in range(hist-1,0,-1):
                    state = np.hstack([state,df_in.loc[i-index, hist_feat]])
                
                # for current state, use state_features because we don't want to pass in the action!
                state = np.hstack([state, df_in.loc[i, state_features]])
                
                X.append(state)
            else:
                num_zeros = (hist+1) - count_in_traj
                num_actual = count_in_traj - 1
                state = np.hstack([np.zeros(len(hist_feat)) for _ in range(num_zeros)])
                for index in range(num_actual, 0, -1):
                    state = np.hstack([state,df_in.loc[i-index, hist_feat]])
                state = np.hstack([state, df_in.loc[i, state_features]])
                X.append(state)
                
            #always reset the count
            count_in_traj = 0

    return np.array(X),pd.get_dummies(np.array(Y)).values

In [15]:
def batch_sample(batch_size, features, labels):
    idx = np.random.choice(np.arange(len(features)), batch_size)
    return (np.vstack(features[idx]), np.vstack(labels[idx]))

In [16]:
dire = 'behaviour_clone_data/'
if not os.path.exists(dire):
    os.makedirs(dire)

if not os.path.exists(dire + 'X_train_hist_zeros.txt'):
    train_feat_zeros, train_labels_zeros = make_data_history_zeros(df)
    np.savetxt(dire + 'X_train_hist_zeros.txt',train_feat_zeros,fmt='%5.4f')
    np.savetxt(dire + 'Y_train_hist_zeros.txt',train_labels_zeros,fmt='%5.4f')
    print ("Saved train_zeros")
else:
    train_feat_zeros = np.loadtxt(dire + 'X_train_hist_zeros.txt')
    train_labels_zeros = np.loadtxt(dire + 'Y_train_hist_zeros.txt')
    print ("Loaded train_zeros")

if not os.path.exists(dire + 'X_val_hist_zeros.txt'):
    val_feat_zeros, val_labels_zeros = make_data_history_zeros(val_df)
    np.savetxt(dire + 'X_val_hist_zeros.txt',val_feat_zeros,fmt='%5.4f')
    np.savetxt(dire + 'Y_val_hist_zeros.txt',val_labels_zeros,fmt='%5.4f')
    print ("Saved val_zeros")
else:
    val_feat_zeros = np.loadtxt(dire + 'X_val_hist_zeros.txt')
    val_labels_zeros = np.loadtxt(dire + 'Y_val_hist_zeros.txt')
    print ("Loaded val_zeros")

if not os.path.exists(dire + 'X_test_hist_zeros.txt'):
    test_feat_zeros, test_labels_zeros = make_data_history_zeros(test_df)
    np.savetxt(dire + 'X_test_hist_zeros.txt',test_feat_zeros,fmt='%5.4f')
    np.savetxt(dire + 'Y_test_hist_zeros.txt',test_labels_zeros,fmt='%5.4f')
    print ("Saved test_zeros")
else:
    test_feat_zeros = np.loadtxt(dire + 'X_test_hist_zeros.txt')
    test_labels_zeros = np.loadtxt(dire + 'Y_test_hist_zeros.txt')
    print ("Loaded test_zeros")

10000
20000
30000
40000
50000
60000
70000
80000
90000
100000
110000
120000
130000
140000
150000
160000
Saved train_zeros
10000
20000
Saved val_zeros
10000
20000
30000
40000
Saved test_zeros


In [17]:
train_feat = train_feat_zeros
train_labels = train_labels_zeros

val_feat = val_feat_zeros
val_labels = val_labels_zeros

test_feat = test_feat_zeros
test_labels = test_labels_zeros

In [18]:
print (train_feat.shape, train_labels.shape)
print (val_feat.shape, val_labels.shape)
print (test_feat.shape, test_labels.shape)

(169495, 198) (169495, 25)
(24338, 198) (24338, 25)
(48617, 198) (48617, 25)


In [19]:
feature_length = train_feat.shape[1]
batch_size = 64
num_actions = 25
hidden_size = 64

In [20]:
# define the policy network here
class PolicyModel():
    def __init__(self):
        self.input_feat = tf.placeholder(tf.float32, shape = [None, feature_length])
        self.labels = tf.placeholder(tf.float32, shape = [None, num_actions])
        self.phase = tf.placeholder(tf.bool)
        
        self.fc_1 = tf.contrib.layers.fully_connected(self.input_feat, 64, activation_fn=tf.nn.relu)
#         self.bn_1 = tf.contrib.layers.batch_norm(self.fc_1, center=True, scale=True, is_training=self.phase)
        self.fc_2 = tf.contrib.layers.fully_connected(self.fc_1 , 64, activation_fn=tf.nn.relu)
#         self.bn_2 = tf.contrib.layers.batch_norm(self.fc_2, center=True, scale=True, is_training=self.phase)
        
        self.logits = tf.contrib.layers.fully_connected(self.fc_2 , num_actions, activation_fn=None)
        self.output = tf.nn.softmax(self.logits)
        self.reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
        self.reg_constant = 0.1 
        
        self.accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(self.labels, 1), tf.argmax(self.output, 1)),'float32'))
        self.loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = self.logits, labels = self.labels)) + self.reg_constant*sum(self.reg_losses)

        
        self.update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(self.update_ops):
            self.train_step = tf.train.AdamOptimizer().minimize(self.loss)

In [21]:
def get_policy(dataset,sess, mdl):

    if dataset == 'train':
        features, labels = train_feat,train_labels
    elif dataset == 'val':
        features, labels = val_feat,val_labels
    elif dataset == 'test':
        features, labels = test_feat,test_labels

    
    op = np.zeros((len(features), num_actions))
    total_acc = 0
    total_loss = 0
    j = 0
    while (j < len(features)):
        feat = None
        lbls = None
        if len(features) - j < batch_size:
            feat = features[j:-1]
            lbls = labels[j:-1]
        else:
            feat = features[j:j+batch_size]
            lbls = labels[j:j+batch_size]
        feat = feat.reshape(len(feat), feature_length)
        lbls = lbls.reshape(len(lbls), num_actions)
        if j%10000 == 0: print('Processing val set indx: ', j )
        softmax, accuracy, loss = sess.run([mdl.output, mdl.accuracy, mdl.loss], 
                                           feed_dict={mdl.input_feat : feat, mdl.phase: 0,mdl.labels: lbls})
        total_acc += accuracy
        op[j:j+len(feat)] = softmax
        if len(features) - j < batch_size:
            j = len(features)
        else: j+=batch_size
        final_acc = total_acc/(len(op)/batch_size)
        total_loss += loss
    return op, final_acc, total_loss

In [22]:
num_epochs = 20
load_model = False #Whether to load a saved model.
save_dir = "./behaviour_clone/"
save_path = "./behaviour_clone/ckpt"#The path to save our model to.#

tf.reset_default_graph()

mdl = PolicyModel()

config = tf.ConfigProto()
config.gpu_options.allow_growth = True  # Don't use all GPUs 
config.allow_soft_placement = True  # Enable manual control

saver = tf.train.Saver(tf.global_variables())

init = tf.global_variables_initializer()

trainables = tf.trainable_variables()

#Make a path for our model to be saved in.
if not os.path.exists(save_dir):
    os.makedirs(save_dir)


with tf.Session(config=config) as sess:
    if load_model == True:
        print('Trying to load model...')
        try:
            restorer = tf.train.import_meta_graph(save_path + '.meta')
            restorer.restore(sess, tf.train.latest_checkpoint(save_dir))
            print ("Model restored")
        except IOError:
            print ("No previous model found, running default init")
            sess.run(init)
    else:
        print("Running default init")
        sess.run(init)
    print("Init done")

    net_loss = 0
    net_accuracy = 0.0
    print('Starting training!')
    for j in range(1, num_epochs+1):
        net_loss = 0.0
        net_loglik = 0.0
        inds = np.random.permutation(train_feat.shape[0])
        start_idx = 0
        end_idx = 0
        while start_idx < len(train_feat):
            end_idx = min(len(train_feat), start_idx+batch_size)
            batch_inds = inds[start_idx:end_idx]
            x_batch = train_feat[batch_inds]
            y_batch = train_labels[batch_inds]
            _, loss, accuracy = sess.run([mdl.train_step, mdl.loss, mdl.accuracy], 
                                     feed_dict={mdl.input_feat : x_batch, mdl.labels: y_batch, mdl.phase: 1})
            net_loss += loss
            net_accuracy += accuracy
            start_idx += batch_size
        
        saver.save(sess,save_path)
        print("Saved Model, epoch " + str(j))

        av_loss = net_loss/(len(train_feat)/float(batch_size))
        print("Average training loss per batch is %f and epoch is %d"  % (av_loss, j))
        val_policy, val_acc, val_loss = get_policy('val', sess, mdl)
        print('Val set loss: %f ' % (val_loss,))
        net_loss = 0.0

    print ("Finished, getting final accuracy")
    val_policy, val_acc, val_loss = get_policy('val', sess, mdl)
    test_policy, _, _ = get_policy('test',sess, mdl)
    print('Val set accuracy, loss: ', val_acc, val_loss)



Running default init
Init done
Starting training!
Saved Model, epoch 1
Average training loss per batch is 1.479536 and epoch is 1
Processing val set indx:  0
Val set loss: 512.040495 
Saved Model, epoch 2
Average training loss per batch is 1.273859 and epoch is 2
Processing val set indx:  0
Val set loss: 485.370984 
Saved Model, epoch 3
Average training loss per batch is 1.231285 and epoch is 3
Processing val set indx:  0
Val set loss: 477.474617 
Saved Model, epoch 4
Average training loss per batch is 1.208725 and epoch is 4
Processing val set indx:  0
Val set loss: 470.440508 
Saved Model, epoch 5
Average training loss per batch is 1.192957 and epoch is 5
Processing val set indx:  0
Val set loss: 467.680326 
Saved Model, epoch 6
Average training loss per batch is 1.181873 and epoch is 6
Processing val set indx:  0
Val set loss: 466.871370 
Saved Model, epoch 7
Average training loss per batch is 1.173027 and epoch is 7
Processing val set indx:  0
Val set loss: 465.856673 
Saved Model,

In [23]:
val_selected = np.argmax(val_labels_zeros, axis=1)

In [24]:
val_pol_selected = np.argmax(val_policy, axis=1)

In [25]:
sum(val_pol_selected == val_selected)/float(len(val_selected))

0.59548853644506528