In [88]:
import importlib
modules = ["maxent", "optimizer", "plot", "solver", "limiirl", "utils"]
def load_modules(modules): 
    for module in modules:
        imported_module = importlib.import_module(module)
        importlib.reload(imported_module)


load_modules(modules)

In [89]:
import pandas as pd 
import numpy as np 
import json
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from datetime import datetime
from maxent import irl, irl_causal, feature_expectation_from_trajectories
import optimizer as O 
import solver as S                          # MDP solver (value-iteration)
import plot as P
from sklearn.preprocessing import OneHotEncoder
from limiirl import limiirl
from utils import *
import importlib
from scipy.stats import wasserstein_distance

In [3]:
# Load patients, inputevents, and vital signs dataframe from MIMIC-IV 

patients_df = read_csv_to_dataframe("data/patients.csv")
# prescriptions_df = read_csv_to_dataframe("data/prescriptions.csv")
inputevents_df = read_csv_to_dataframe("data/inputevents.csv")
# procedureevents_df = read_csv_to_dataframe("data/procedureevents.csv")
# d_icd_diagnoses_df = read_csv_to_dataframe("data/d_icd_diagnoses.csv")
# triage_df = read_csv_to_dataframe("data/triage.csv")
vitalsign_df = read_csv_to_dataframe("data/vitalsign.csv")

In [4]:
# Merge vital signs and patients 
data_pv = pd.merge(patients_df, vitalsign_df, on='subject_id', how='inner')
_ = fill_NANS(data_pv)

In [5]:
# Obtain unique heart rhythms 
unique_rhythms = data_pv['rhythm'].unique()
n_rhythms = len(unique_rhythms)
rhythms_mapping = {k:v for k,v in zip(unique_rhythms, range(n_rhythms))}

In [6]:
# features 
features = ["gender", "anchor_age", "temperature", "heartrate", "resprate", "o2sat", "sbp", "dbp", "rhythm"]

In [16]:
M = read_json("data/process/M.json")
M = np.array(M)

In [9]:
def discretize_S(M, n_clusters=100, random_state=42): 
    """
    M: matrix representation of data 

    returns a model that takes in a feature set D -> state s from 0 to K - 1
    where K is the number of clusters 
    """

    # instantiate model and fit data
    kmeans = KMeans(n_clusters=n_clusters, random_state=random_state).fit(M)

    # return model 
    return kmeans

In [10]:
state_model = discretize_S(M)

  super()._check_params_vs_input(X, default_n_init=10)


In [11]:
inputevents_sample = inputevents_df.sample(n=6000000, random_state = 42)

In [21]:
n_actions = len(inputevents_df['ordercategorydescription'].unique())
actions = inputevents_df['ordercategorydescription'].unique() 
action_mapping = {k:v for k, v in zip(actions, range(n_actions))}

In [47]:
def construct_trajectories(p_events, p_vitals): 
    """
    p_events: events for each patient 
    p_vitals: vital readings for each patient 
    """
    trajs = {} 
    
    for patient in p_events: 
        tau = trajs_from_patient(p_events[patient], p_vitals[patient])
        # drop trajectories with length = 0
        if (len(tau) > 1):
            trajs[int(patient)] = trajs_from_patient(p_events[patient], p_vitals[patient])

    return trajs 
    

In [24]:
patient_events = find_patient_events(inputevents_sample)

In [30]:
patient_vitals = find_patient_vitals(data_pv, state_model, features, rhythms_mapping)

In [31]:
# enforce invariant that vital and events data contains entries for both events and vitals   
p_events, p_vitals = intersect_vitals_events(patient_events, patient_vitals)

In [50]:
trajectories = construct_trajectories(p_events, p_vitals)

In [51]:
save_json(trajectories, "data/process/trajs.json")

In [59]:
trajectories = read_json("data/process/trajs.json")
trajectories = { int(k) : v for k, v in zip(trajectories.keys(), trajectories.values()) }

In [60]:
def convert_traj(trajectories):
    lst = []
    for patient in trajectories:
        traj = trajectories[patient]
        row = []
        n = len(traj)
        for i in range(0, n-2, 2):
            row.append((traj[i], traj[i+1], traj[i+2]))
        
        lst.append(row)
    
    return lst

In [61]:
discount = 0.9
num_clusters = 100

smoothing_value = 1

p_transition = np.zeros((num_clusters, num_clusters, 5)) + smoothing_value

In [62]:
T = convert_traj(trajectories)

for traj in T:

  for tran in traj:

    p_transition[tran[0], tran[2], tran[1]] +=1

p_transition = p_transition/ p_transition.sum(axis = 1)[:, np.newaxis, :]

In [63]:
terminal_states = set()

for patient in trajectories:
    terminal_states.add(trajectories[patient][-1])

terminal_states = list(terminal_states)

In [64]:
# Convert states and actions to one-hot encoding
state_encoder = OneHotEncoder(sparse=False, categories= [np.arange(num_clusters)])

In [68]:
# set up features: we use one feature vector per state (1 hot encoding for each cluster/state)
features = state_encoder.fit_transform(np.arange(num_clusters).reshape(-1, 1))

# choose our parameter initialization strategy:
#   initialize parameters with constant
init = O.Constant(1.0)

# choose our optimization strategy:
#   we select exponentiated stochastic gradient descent with linear learning-rate decay
optim = O.ExpSga(lr=O.linear_decay(lr0=0.2))

# actually do some inverse reinforcement learning
# reward_maxent = maxent_irl(p_transition, features, terminal_states, trajectories, optim, init, eps= 1e-3)

reward_maxent_causal, theta_causal = irl_causal(p_transition, features, terminal_states, T, optim, init, discount,
               eps=1e-3, eps_svf=1e-4, eps_lap=1e-4)



In [69]:
def find_policy(p_transition, reward, discount=0.9):
    """ 
    p_transition: transition probabilities
    reward: reward function 
    discount: discount factor 

    calculates V and Q function using value iteration 
    
    returns softmax policy and policy 
    """ 
    V, Q = S.value_iteration(p_transition, reward, discount)
    Q = Q.reshape((5, num_clusters))

    # get softmax policy 
    soft_pi = (np.exp(Q)/ np.sum(np.exp(Q), axis = 0)).T
    policy = np.argmax(Q, axis = 0).reshape(-1, )

    return policy, soft_pi 

In [70]:
V, Q = S.value_iteration(p_transition, reward_maxent_causal, discount)

In [71]:
Q = Q.reshape((5, num_clusters))

In [72]:
soft_pi_mce = (np.exp(Q)/ np.sum(np.exp(Q), axis = 0)).T

soft_pi_mce

In [73]:
policy_mce = np.argmax(Q, axis = 0).reshape(-1, )

policy_mce

In [93]:
import random 

n_patients = 100 
K = 5
gamma = 0.9


random_patients = random.sample(list(trajectories.keys()), n_patients)

trajectories_s = { patient: trajectories[patient] for patient in random_patients }

In [75]:
def phi(tau, f, gamma): 
    """
    phi is a feature-based function that maps a trajectory `tau` -> R 
    
    tau: trajectory of the form (s_1, a_1, s_2, a_2, ..., s_{|tau|})
    f: feature function: S -> R
    gamma: discount factor 

    phi(tau) = \sum_{t = 1}^{|tau|}} gamma^{t - 1}phi(s_t) 

    define |tau| as number of (s_t, a_t, s_{t + 1}) tuples 
    """

    n = len(tau)
    return np.sum([(gamma ** (i // 2)) * f[tau[i]]  for i in range(0, n, 2)], axis = 0)

In [76]:
f = feature_expectation_from_trajectories(features, T)

In [77]:
def feature_trajectories(taus, gamma=0.9): 
    """ 
    taus: dictionary of the form { patient: tau } where tau is the patient's trajectory 
    during stay in ICU (from MIMIC dataset)

    gamma: discount factor 

    returns X of shape N x d, where N is the number of trajectories and d represents number of features 
    """
    X = [] 

    for patient in taus: 
        traj = taus[patient] 
        phi_t = phi(traj, f, gamma)
        X.append([phi_t])

    return np.array(X)    

In [78]:
X = feature_trajectories(trajectories_s)

In [79]:
def cluster_trajectories(X, n_experts=100): 
    """
    X: feature representation of trajectories
    n_experts: assume trajectories come from `n_experts` experts 

    performs kmeans clustering on X and returns model 
    """
    kmeans = KMeans(n_clusters=n_experts, random_state=42).fit(X)

    return kmeans 

In [80]:
cluster_model = cluster_trajectories(X)

  super()._check_params_vs_input(X, default_n_init=10)


In [55]:
data = {} # key: cluster, value: 2d list with the trajectories in that cluster 

for patient in trajectories: 
    traj = trajectories[patient]
    phi_t = phi(traj, f, gamma)
    cluster = cluster_model.predict([[phi_t]])[0]
    if not cluster in data: 
        data[cluster] = [traj]
    else: 
        data[cluster].append(traj)

In [81]:
def format_traj(trajectories):
    lst = []
    for traj in trajectories:
        row = []
        n = len(traj)
        for i in range(0, n-2, 2):
            row.append((traj[i], traj[i+1], traj[i+2]))
        
        lst.append(row)
    
    return lst

In [83]:
def likelihood(traj, theta, gamma=0.9): 
    # calculate reward 
    reward = features.dot(theta)

    V, Q = S.value_iteration(p_transition, reward, gamma)
    # calculate policy using softmax selection model 

    Q = Q.reshape((5, num_clusters))
    soft_pi = (np.exp(Q)/ np.sum(np.exp(Q), axis = 0)).T
     
    prod = 1 
    n = len(traj)
    for i in range(0, n - 1, 2): 
        state = traj[i]
        action = traj[i + 1]
        prod *= soft_pi[state][action]

    return prod 

In [None]:
# import matplotlib.pyplot as plt

# N = 20 

# x = np.arange(100)

# fig, ax = plt.subplots(figsize=(16, 12))

# # Plotting each dataset on the same axes with a label for the legend
# ax.plot(x, reward_maxent_causal, label='Single Intention')

# for i in range(N): 
#     ax.plot(x, traj_models[i]['reward'], label=f'MI: Expert_{i}')
   

# # Adding the legend, which uses the labels specified in the plot commands
# ax.legend()

# # Setting the x-axis and y-axis labels
# ax.set_xlabel('state')
# ax.set_ylabel('reward')

# # Setting the x-axis range from 0 to 99
# ax.set_xlim([0, 99])

# plt.show()

In [90]:
max_iterations = 10
taus = list(trajectories_s.values())
epsilon = 0.005

In [94]:
rho, theta, u = limiirl(X, taus, features, cluster_model, num_clusters, max_iter=4, K=10)

LiMIIRL: cluster 0
LiMIIRL: cluster 1
LiMIIRL: cluster 2
LiMIIRL: cluster 3
LiMIIRL: cluster 4


KeyboardInterrupt: 

In [None]:
def sample_policy(pi, state, N=1000): 
    dist = pi[state]

    actions = np.arange(len(dist))
    sampled_actions = np.random.choice(actions, size=N, p=dist)

    return sampled_actions  

def policy_dist(pi_one, pi_two):
    # soft policies - distribution of actions for each state 

    dist_vect = [] 
    for s in range(num_clusters): 
        sample_one = sample_policy(pi_one, s)
        sample_two = sample_policy(pi_two, s)
        dist_vect.append(wasserstein_distance(sample_one, sample_two)) 

    
    return np.mean(dist_vect)

def cmp_single(expert_policies, single_policy): 
    """
    expert_policies: arr of policies of shape K x |S| x |A|  
    single_policy: policy of shape |S| x |A| 
    returns a vector of distances comparing kth expert with single intention model
    """

    dist_vect = [] 
    for expert_pi in expert_policies: 
        dist = policy_dist(expert_pi, single_policy)
        dist_vect.append(dist)

    return np.array(dist_vect)