In [1]:
import sys, os, pickle

import gridworld as W                       # basic grid-world MDPs
import trajectory as T                      # trajectory generation
import optimizer as O                       # stochastic gradient descent optimizer
import solver as S                          # MDP solver (value-iteration)
import plot as P                            # helper-functions for plotting

import pandas as pd
import numpy as np
from numpy.lib.stride_tricks import sliding_window_view
import matplotlib.pyplot as plt

from sklearn.cluster import KMeans
import tensorflow as tf
from sklearn.preprocessing import OneHotEncoder

from itertools import product

from maxent import compute_expected_causal_svf

import trajectory as T                      # trajectory generation
import optimizer as O                       # stochastic gradient descent optimizer
import solver as S                          # MDP solver (value-iteration)
import plot as P


num_data = 355504


np.random.seed(66)

def to_interval(istr):
    c_left = istr[0]=='['
    c_right = istr[-1]==']'
    closed = {(True, False): 'left',
              (False, True): 'right',
              (True, True): 'both',
              (False, False): 'neither'
              }[c_left, c_right]
    left, right = map(pd.to_datetime, istr[1:-1].split(','))
    return pd.Interval(left, right, closed)

re_split = False
frac = [0.4,0.2,0.4]
assert np.sum(frac) == 1
frac = np.cumsum(frac)
print (frac)
data_save_path= 'data/'

def sliding(gs, window_size = 6):
    npr_l = []
    for g in gs:
        npr = np.concatenate([np.zeros([window_size-1, g.shape[1]]),g])
        npr_l.append(sliding_window_view(npr, (window_size, g.shape[1])).squeeze(1))
    return np.vstack(npr_l)

[0.4 0.6 1. ]


In [2]:
def to_one_hot_array(input_array, num_unique_values=4):
  one_hot_array = np.zeros((len(input_array), num_unique_values), dtype=int)

  for i, val in enumerate(input_array):
    one_hot_array[i, val] = 1

  return one_hot_array

# Load MIMIC Data

In [3]:
# if re_split:


aggr_df = pd.read_csv('mimic_iv_hypotensive_cut2.csv',sep = ',', header = 0,converters={1:to_interval}).set_index(['stay_id','time']).sort_index()
# create action bins (four actions in total)
aggr_df['action'] = aggr_df['bolus(binary)']*2 + aggr_df['vaso(binary)']
all_idx = np.random.permutation(aggr_df.index.get_level_values(0).unique())
train_df = aggr_df.loc[all_idx[:int(len(all_idx)*frac[0])]].sort_index()
test_df = aggr_df.loc[all_idx[int(len(all_idx)*frac[0]):int(len(all_idx)*frac[1])]].sort_index()
valid_df = aggr_df.loc[all_idx[int(len(all_idx)*frac[1]):]].sort_index()
# print (np.unique(train_df['action'],return_counts=True)[1]*1./len(train_df))
# pickle.dump([train_df, test_df, valid_df], open(data_save_path+'processed_mimic_hyp_2.pkl','wb'))
drop_columns = ['vaso(amount)','bolus(amount)',\
            'any_treatment(binary)','vaso(binary)','bolus(binary)']




# Data Cleaning

In [4]:
# for now drop indicators about bolus and vaso
train_df = train_df.drop(columns=drop_columns)
test_df = test_df.drop(columns=drop_columns)
valid_df = valid_df.drop(columns=drop_columns)

#### imputation
impute_table = pd.read_csv('mimic_iv_hypotensive_cut2_impute_table.csv',sep=',',header=0).set_index(['feature'])
train_df = train_df.fillna(method='ffill')
test_df = test_df.fillna(method='ffill')
valid_df = valid_df.fillna(method='ffill')




for f in impute_table.index:
    train_df[f] = train_df[f].fillna(value = impute_table.loc[f].values[0])
    test_df[f] = test_df[f].fillna(value = impute_table.loc[f].values[0])
    valid_df[f] = valid_df[f].fillna(value = impute_table.loc[f].values[0])


data_non_normalized_df = pd.concat([train_df, valid_df, test_df], axis=0, ignore_index=False).head(num_data).copy()


#### standard normalization ####
normalize_features = ['creatinine', 'fraction_inspired_oxygen', 'lactate', 'urine_output',
                  'alanine_aminotransferase', 'asparate_aminotransferase',
                  'mean_blood_pressure', 'diastolic_blood_pressure',
                  'systolic_blood_pressure', 'gcs', 'partial_pressure_of_oxygen']
mu, std = (train_df[normalize_features]).mean().values,(train_df[normalize_features]).std().values
train_df[normalize_features] = (train_df[normalize_features] - mu)/std
test_df[normalize_features] = (test_df[normalize_features] - mu)/std
valid_df[normalize_features] = (valid_df[normalize_features] - mu)/std




### create data matrix ####
X_train = train_df.loc[:,train_df.columns!='action']
y_train = train_df['action']

X_test = test_df.loc[:,test_df.columns!='action']
y_test = test_df['action']

X_valid = valid_df.loc[:, valid_df.columns!='action']
y_valid = valid_df['action']

In [5]:
X_df = pd.concat([X_train, X_valid, X_test], axis=0, ignore_index=True).copy()
y_df = pd.concat([y_train, y_valid, y_test], axis=0, ignore_index=True).copy()
data_df = pd.concat([train_df, valid_df, test_df], axis=0, ignore_index=False).copy()

# Clustering (Kmeans)

In [6]:
num_clusters = 100
kmeans = KMeans(n_clusters= num_clusters , random_state=0)
kmeans.fit(X_df)



In [7]:
# Assigning each data point to a cluster

X_df['cluster'] = kmeans.labels_.copy()
data_df['cluster'] = kmeans.labels_.copy()
data_non_normalized_df['cluster'] = kmeans.labels_.copy()

# Converting the data into trajectories to input to an IRL algorithm

In [8]:
unique_stay_ids = data_df.index.get_level_values('stay_id').unique()

trajectories = []


for stay_id in unique_stay_ids:


  states, actions = data_df.loc[stay_id]['cluster'], data_df.loc[stay_id]['action']

  trajectory = []
  for i in range(len(states) - 1):
    trajectory.append((states[i], int(actions[i]), states[i+1] ))

  trajectories.append(T.Trajectory(trajectory))

In [9]:
# Store all possible terminal states from trajectory list
terminal_states = []

for traj in trajectories:
  terminal_states.append(traj._t[-1][-1])

terminal_states = list(set(terminal_states))

# MaxEnt Causal IRL Helper Functions

In [10]:
def feature_expectation_from_trajectories(features, trajectories):
    n_states, n_features = features.shape

    fe = np.zeros(n_features)

    for t in trajectories:                  # for each trajectory
        for s in t.states():                # for each state in trajectory
            fe += features[s, :]            # sum-up features

    return fe / len(trajectories)           # average over trajectories

In [11]:
def initial_probabilities_from_trajectories(n_states, trajectories):
    p = np.zeros(n_states)

    for t in trajectories:                  # for each trajectory
        p[t.transitions()[0][0]] += 1.0     # increment starting state

    return p / len(trajectories)            # normalize

In [30]:
def irl_causal(p_transition, features, terminal, trajectories, optim, init, discount,
               eps=1e-4, eps_svf=1e-5, eps_lap=1e-5):
    n_states, _, n_actions = p_transition.shape
    _, n_features = features.shape

    # compute static properties from trajectories
    e_features = feature_expectation_from_trajectories(features, trajectories)
    p_initial = initial_probabilities_from_trajectories(n_states, trajectories)

    # basic gradient descent
    theta = init(n_features)
    delta = np.inf

    all_rewards = []
    all_grads = []

    optim.reset(theta)
    while delta > eps:
        theta_old = theta.copy()

        # compute per-state reward
        reward = features.dot(theta)
        all_rewards.append(reward)

        # compute the gradient
        e_svf = compute_expected_causal_svf(p_transition, p_initial, terminal, reward, discount,
                                            eps_lap, eps_svf)

        grad = e_features - features.T.dot(e_svf)
        all_grads.append(grad)

        # perform optimization step and compute delta for convergence
        optim.step(grad)
        delta = np.max(np.abs(theta_old - theta))

    # save all_rewards and all_grads to csv files, where each row corresponds to 1 step
    # UNCOMMENT this block of code if you want to save another run of rewards and grads
    
    # all_rewards = pd.DataFrame(all_rewards)
    # all_grads = pd.DataFrame(all_grads)
    # all_rewards.to_csv('all_rewards.csv', index=False)
    # all_grads.to_csv('all_grads.csv', index=False)

    # re-compute per-state reward and return
    return features.dot(theta)

# MaxEnt Causal Reward

In [31]:
smoothing_value = 1

p_transition = np.zeros((num_clusters, num_clusters, 4)) + smoothing_value


for traj in trajectories:

  for tran in traj._t:

    p_transition[tran[0], tran[2], tran[1]] +=1

p_transition = p_transition/ p_transition.sum(axis = 1)[:, np.newaxis, :]

In [32]:
# set up features: we use one feature vector per state
state_encoder = OneHotEncoder(sparse=False, categories= [np.arange(num_clusters)])
action_encoder = OneHotEncoder(sparse=False, categories= [np.arange(4)])
state_onehot = state_encoder.fit(X_df['cluster'].to_numpy().reshape(-1, 1))
features = state_onehot.transform(np.arange(num_clusters).reshape(-1, 1))

# choose our parameter initialization strategy:
#   initialize parameters with constant
init = O.Constant(1.0)

# choose our optimization strategy:
#   we select exponentiated stochastic gradient descent with linear learning-rate decay
optim = O.ExpSga(lr=O.linear_decay(lr0=0.1))

discount = 0.9

# Computing the R function through inverse reinforcement learning
reward_maxent_causal = irl_causal(p_transition, features, terminal_states, trajectories, optim, init, discount,
               eps=1e-3, eps_svf=1e-4, eps_lap=1e-4)



[2.61152321e-01 8.58727670e-01 8.01309928e-01 1.04259627e+00
 1.24090392e-01 4.26667733e-01 7.09880253e-01 1.91780373e-01
 3.08105124e-02 3.38943195e-01 9.37192058e-01 3.32880290e-01
 1.02020267e+00 1.65850081e-01 5.12709822e-01 1.41098440e-01
 8.30945215e-01 6.46906162e-01 3.25472913e-01 1.33136616e+00
 9.11766902e-01 6.31079918e-01 6.97252674e-01 5.23149290e-01
 3.94171348e-01 9.29403084e-02 2.22256216e-01 1.19209233e-01
 1.33102940e+00 1.14951737e+00 6.68291570e-01 6.95375708e-02
 1.22596121e+00 8.44247118e-01 1.05741432e+00 4.03263787e-01
 3.87243026e-02 7.17458096e-01 1.30998209e+00 3.99222703e-01
 7.75043009e-01 1.48004439e+00 1.84033015e-01 1.64763797e-02
 5.56319857e-01 1.01529489e-01 7.55511641e-01 1.28068423e+00
 1.90770102e-01 1.81821238e-02 2.85967890e-03 4.31213882e-01
 8.39027384e-01 6.36804787e-01 1.02559078e+00 1.80499012e-01
 1.57936291e-01 3.04737553e-02 1.51369529e-01 5.74672447e-01
 1.17477414e+00 4.28183808e-01 6.67954813e-01 1.39198243e+00
 3.28166970e-01 1.303246

In [29]:
f = pd.read_csv('all_grads.csv')
f.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,90,91,92,93,94,95,96,97,98,99
0,0.261152,0.858728,0.80131,1.042596,0.12409,0.426668,0.70988,0.19178,0.030811,0.338943,...,0.000334,0.4462,0.046806,0.814444,1.002186,0.523486,0.113148,0.502944,1.40057,1.307625
1,0.261152,0.858728,0.80131,1.042596,0.12409,0.426668,0.70988,0.19178,0.030811,0.338943,...,0.000334,0.4462,0.046806,0.814444,1.002186,0.523486,0.113148,0.502944,1.40057,1.307625
2,0.261152,0.858728,0.80131,1.042596,0.12409,0.426668,0.70988,0.19178,0.030811,0.338943,...,0.000334,0.4462,0.046806,0.814444,1.002186,0.523486,0.113148,0.502944,1.40057,1.307625
3,0.261152,0.858728,0.80131,1.042596,0.12409,0.426668,0.70988,0.19178,0.030811,0.338943,...,0.000334,0.4462,0.046806,0.814444,1.002186,0.523486,0.113148,0.502944,1.40057,1.307625
4,0.261152,0.858728,0.80131,1.042596,0.12409,0.426668,0.70988,0.19178,0.030811,0.338943,...,0.000334,0.4462,0.046806,0.814444,1.002186,0.523486,0.113148,0.502944,1.40057,1.307625
