In [1]:
import numpy as np
import pandas as pd
import copy
import random
import itertools
import json
import pickle

import torch
import seaborn as sn
import itertools
from sklearn.model_selection import train_test_split

import os
os.environ["CUDA_VISIBLE_DEVICES"] = "-1" 

In [2]:
from GP.gp import *
from GP.dqn import *

In [3]:
# Dataset info
env = 'CCHS'
id_feat = 'VisitIdentifier'
sel_feats = ['SystolicBP', 'MAP', 'RespiratoryRate', 'PulseOx', 'HeartRate', 'Temperature', 
             'WBC', 'BiliRubin', 'BUN', 'Lactate', 'Creatinine', 'Platelet', 'Bands', 'FIO2']

max_sel_feats = ['max_' + i for i in sel_feats]
min_sel_feats = ['min_' + i for i in sel_feats]
all_feats = sel_feats + max_sel_feats + min_sel_feats

In [4]:
# Load the data 
df = pd.read_csv('../baseline/example/cchs_sample_data.csv')
info = pd.read_csv('../baseline/example/cchs_sample_visInfo.csv')

In [5]:
# Get trajectory length
vid_list = sorted(np.unique(df.VisitIdentifier))
df_len = [len(df.loc[df.VisitIdentifier == vid]) for vid in vid_list]

# Get the final outcome as reward
rewards_list = [0] * len(vid_list)
for idx in range(len(vid_list)):
    tmp_lab = info.loc[info['VisitID'] == vid_list[idx]]['DeceasedFlag'].values[0]
    rewards_list[idx] = [np.nan]*(df_len[idx] - 1) + [tmp_lab]
all_rewards = list(itertools.chain.from_iterable(rewards_list))

df['reward'] = all_rewards
df['Action'] = df['Action'].apply(np.int64)

In [6]:
# Infer rewards by GP
GP_reward_path = ''.join(['example/', env, '_GP_rewards.npy'])
distributed_rewards = gp_rewards(df, id_feat, sel_feats, GP_reward_path, learn_gp=True)

Prepare phi
Episode_index
Error value for length_scale: 0 and sigma:  0 is equal to:  9.092858529366566
The cross validation error matrix is as follows: 
[[9.09285853]]


In [7]:
# Assign rewards to each timestamp
all_seq_len = [0] + list(np.cumsum([len(i)-1 for i in rewards_list]))
GP_rewards_list = copy.deepcopy(rewards_list)

for idx in range(len(all_seq_len)-1): 
    GP_rewards_list[idx][:-1] = [distributed_rewards[i][0] for i in range(all_seq_len[idx], all_seq_len[idx+1])]
GP_rewards = list(itertools.chain.from_iterable(GP_rewards_list))
df['GP_reward'] = GP_rewards

# Get the late rewards
terminals_list = [[0]*(len(i)-1) + [1] for i in rewards_list]
terminals = list(itertools.chain.from_iterable(terminals_list))
df['terminals'] = terminals

# Get the list of dataframes
df_list = []
vid_list = sorted(np.unique(df.VisitIdentifier))
for vid in vid_list: 
    df_list.append(df.loc[df.VisitIdentifier == vid])

In [8]:
seed_rp_results = []

for rp_idx in range(1): 
    # for rp_idx in range(repeat_time): 
    print('repeat idx: ', rp_idx)

    # ------------------------------------------------------------------------------------
    # Get the training and testing index
    tr_idx, te_idx = train_test_split(np.arange(len(df_list)), test_size=0.2, random_state=rp_idx)
    
    # Slice the training & testing data
    tr_df = [df_list[i] for i in tr_idx]
    te_df = [df_list[i] for i in te_idx]
    
    tr_observations = np.array(pd.concat([tr_df[i][all_feats] for i in range(len(tr_df))]))
    tr_actions = np.array(pd.concat([tr_df[i]['Action'] for i in range(len(tr_df))]))
    tr_rewards = np.array(pd.concat([tr_df[i]['GP_reward'] for i in range(len(tr_df))]))
    tr_terminals = np.array(pd.concat([tr_df[i]['terminals'] for i in range(len(tr_df))]))

    te_observations = np.array(pd.concat([te_df[i][all_feats] for i in range(len(te_df))]))
    te_actions = np.array(pd.concat([te_df[i]['Action'] for i in range(len(te_df))]))
    te_rewards = np.array(pd.concat([te_df[i]['GP_reward'] for i in range(len(te_df))]))
    te_terminals = np.array(pd.concat([te_df[i]['terminals'] for i in range(len(te_df))]))

    # ------------------------------------------------------------------------------------
    # Training & testing the DQN model
    tr_data = tr_observations, tr_actions, tr_rewards, tr_terminals
    te_data = te_observations, te_actions, te_rewards, te_terminals
    
    model = train_offline_dqn(tr_data, env=env, total_timesteps=100)
    seed_rp_results.append(test_offline_dqn(model, te_data, env=env, verbo=True))

repeat idx:  0




Using cpu device
Performance Measurements:
Confusion matrix: 
 [[ 18 401]
 [  2 176]]
Accuracy:  0.3249581239530988
Recall:  0.9887640449438202
Precision:  0.3050259965337955
F-score:  0.46622516556291393
AUC:  0.5657665388431525
APR:  0.5534130798460787
Jaccard Score:  0.30397236614853196
