In [1]:
import copy
from collections.abc import Iterable
import functools
import itertools
import operator
from matplotlib import pyplot as plt

import pandas as pd
from pandas.api.types import is_numeric_dtype
import numpy as np
import numpy_ext as npe
import math
import random
from pprint import pprint
from scipy.optimize import curve_fit
from scipy.stats import poisson
from scipy.sparse import hstack, vstack, csr_matrix
import scipy

from sklearn.cluster import KMeans
from sklearn.linear_model import LinearRegression
from sklearn.decomposition import PCA
from sklearn.impute import KNNImputer
from sklearn.preprocessing import Normalizer, StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn import metrics
import joblib
import importlib
import concurrent
import time
import traceback

import seaborn as sns

import utils
import safety
import ope

import sys

from config import demographics, vital_sign_vars, lab_vars, treatment_vars, vent_vars, guideline_vars, ffill_windows_clinical, SAMPLE_TIME_H
from config import fio2_bins, peep_bins, tv_bins

In [2]:
test_set_file = 'data/test_unshaped_traj_{}.csv'
train_set_file = 'data/train_unshaped_traj_{}.csv'

#models/mcp_<ACTION_SELECTION_<SEED>_<SHAPING_NAME>_<SHAPING_SCALAR>_<UNSAFETY_PROB>.bin
greedy_policy_file = 'models/mcp_greedy_policy_{}{}_{}_{}.bin'
sm_policy_file = 'models/mcp_softmax_policy_{}{}_{}_{}.bin'
behavior_policy_train_file = 'models/clinicians_policy_train_{}{}.bin'
behavior_policy_test_file = 'models/clinicians_policy_test_{}{}.bin'
behavior_policy_file = 'models/clinicians_policy_train_test_{}{}.bin'

TERMINAL_MORT = 650
TERMINAL_NONMORT = 651

In [3]:
def add_traj_return(dataset):
    return_set = dataset.copy()
    return_set['traj_reward'] = np.nan
    return_set.loc[return_set.mort90day == 't', 'traj_reward'] = -100
    return_set.loc[return_set.mort90day == 'f', 'traj_reward'] = 100
    return_set['traj_return'] = (.99 ** return_set['traj_len']) * return_set['traj_reward']
    return return_set

def add_scaled_traj_return(dataset):
    return_set = dataset.copy()
    return_set['traj_reward'] = np.nan
    return_set.loc[return_set.mort90day == 't', 'traj_reward'] = 0
    return_set.loc[return_set.mort90day == 'f', 'traj_reward'] = 1
    return_set['traj_return'] = (.99 ** return_set['traj_len']) * return_set['traj_reward']
    return return_set

def add_traj_len(dataset):
    assert dataset.traj_count.isna().sum() == 0
    return_set = dataset.copy()
    return_set['traj_len'] = return_set.groupby('icustay_id')['traj_count'].transform('max')
    return_set['traj_len'] = return_set['traj_len'] + 1
    return return_set

def fix_next_terminal_state(dataset):
    if dataset[dataset.terminal].next_state.nunique() == 2:
        # no fix necessary
        return dataset
    else:
        return_set = dataset.copy()
        return_set.loc[return_set.terminal & (return_set.mort90day == 't'), 'next_state'] = TERMINAL_MORT
        return_set.loc[return_set.terminal & (return_set.mort90day == 'f'), 'next_state'] = TERMINAL_NONMORT
        return return_set

def postprocess(dataset):
    ds = fix_next_terminal_state(add_traj_return(add_traj_len(dataset)))
    ds.next_state = ds.next_state.astype('int')
    ds.reward = ds.reward.astype('float')
    return ds

In [4]:
# check dataset sizes (icustay ids)
#seeds = range(10,15)
seeds = range(0, 20)
shaping_fname = {
    'none-0.0': '_none',
#     'avgpotential2-0.1': '_avgpotential2_0.1',
#     'avgpotential2-0.5': '_avgpotential2_0.5',
#     'avgpotential2-1.0': '_avgpotential2_1.0',
#     'avgpotential2-2.0': '_avgpotential2_2.0',
#     'avgpotential2-3.0': '_avgpotential2_3.0',
#     'avgpotential2-4.0': '_avgpotential2_4.0',
#     'avgpotential2-5.0': '_avgpotential2_5.0',
#     'avgpotential2-6.0': '_avgpotential2_6.0',
#     'avgpotential2-10.0': '_avgpotential2_10.0',
}

In [5]:
def try_get_results(*args, **kwargs):
    try:
        return get_results_seed(*args, **kwargs)
    except:
        return [["Exception for {}: {}".format((args, kwargs), sys.exc_info()[0](traceback.format_exc())),],]

def get_results_seed(seed, unsafety_prob):
    results = []
    test_set = postprocess(pd.read_csv(test_set_file.format(seed)))
    train_set = postprocess(pd.read_csv(train_set_file.format(seed), low_memory=False))
    train_test_set = pd.concat([train_set, test_set])
     
    behavior_policy = joblib.load(behavior_policy_file.format(seed,''))
    behavior_train_policy = joblib.load(behavior_policy_train_file.format(seed,''))
    behavior_test_policy = joblib.load(behavior_policy_test_file.format(seed, ''))
    behavior_safe_train = safety.repaired_safe(behavior_train_policy, behavior_train_policy)
    for shaping in shaping_fname.keys():
        np.random.seed(seed)
        if 'none' in shaping:
            shaped = False
            shaping_scalar = 0.0
        else:
            shaped = True
            shaping_scalar = shaping.split('-')[-1]
        shaping_name = shaping_fname[shaping]

        assert test_set.traj_reward.isna().sum() == 0, "Zero rewards in test set"
        assert train_set.traj_reward.isna().sum() == 0, "Zero rewards in train set "

#         greedy_unsafe = utils.repair_unsupported_greedy_policy(
#             joblib.load(greedy_policy_file.format(seed, shaping_fname[shaping])),
#             train_set
#         )
#         greedy_safe = safety.repaired_safe(greedy_unsafe, behavior_train_policy, greedy=True)
        sm_unsafe = joblib.load(sm_policy_file.format(seed, shaping_name, shaping_scalar, unsafety_prob))
        if unsafety_prob == 1.0:
            sm_safe = safety.repaired_safe(sm_unsafe, behavior_train_policy)
        
        evaluations = [
#             (train_set, greedy_unsafe, behavior_policy, 'train', 'greedy', shaped, shaping_scalar, 'unsafe', seed),
#             (test_set, greedy_unsafe, behavior_policy, 'test', 'greedy', shaped, shaping_scalar, 'unsafe', seed),
#             (train_set, greedy_safe, behavior_policy, 'train', 'greedy', shaped, shaping_scalar, 'safe', seed),
#             (test_set, greedy_safe, behavior_policy, 'test', 'greedy', shaped, shaping_scalar, 'safe', seed),
            (train_set, sm_unsafe, behavior_policy, 'train', 'softmax', shaped, shaping_name, shaping_scalar, 'unsafe', unsafety_prob, seed),
            (test_set, sm_unsafe, behavior_policy, 'test', 'softmax', shaped, shaping_name, shaping_scalar, 'unsafe', unsafety_prob, seed),
        ]
        if unsafety_prob == 1.0:
            evaluations += [
                (train_set, sm_safe, behavior_policy, 'train', 'softmax', shaped, shaping_name,  shaping_scalar, 'safe', unsafety_prob, seed),
                (test_set, sm_safe, behavior_policy, 'test', 'softmax', shaped, shaping_name, shaping_scalar, 'safe', unsafety_prob, seed),
            ]
        
        if shaping_name == '_none':
            evaluations += [
                (train_set, behavior_train_policy, behavior_train_policy, 'train', 'observed', shaped, shaping_name, shaping_scalar, 'unsafe', unsafety_prob, seed),
                (test_set, behavior_test_policy, behavior_test_policy, 'test', 'observed', shaped, shaping_name, shaping_scalar, 'unsafe', unsafety_prob, seed),
                (train_set, behavior_train_policy, behavior_policy, 'train', 'behavior', shaped, shaping_name, shaping_scalar, 'unsafe', unsafety_prob, seed),
                (test_set, behavior_train_policy, behavior_policy, 'test', 'behavior', shaped, shaping_name, shaping_scalar, 'unsafe', unsafety_prob, seed),
                (train_set, behavior_safe_train, behavior_policy, 'train', 'behavior', shaped, shaping_name, shaping_scalar, 'safe', unsafety_prob, seed),
                (test_set, behavior_safe_train, behavior_policy, 'test', 'behavior', shaped, shaping_name, shaping_scalar, 'safe', unsafety_prob, seed),
            ]
        
        for ds, evaluation_policy, behavior_policy, *config in evaluations:
            wis_mean, var, traj_weights = ope.wis_policy(ds, evaluation_policy, behavior_policy)
            q_estimator, v_estimator = ope.infer_estimators_func(train_test_set, evaluation_policy, 0.99, 100)
            phwis_mean, var, _ = ope.phwis_policy(ds, evaluation_policy, behavior_policy)
            wdr_mean = ope.wdr_policy(ds, evaluation_policy, behavior_policy, q_estimator, v_estimator, 0.99)
            phwdr_mean = ope.phwdr_policy(ds, evaluation_policy, behavior_policy, q_estimator, v_estimator, 0.99)
#             am = ope.am(ds, evaluation_policy, behavior_policy, delta=0.05)
#             hcope5 = ope.hcope(ds, evaluation_policy, behavior_policy, delta=0.05, c=5)
            am, hcope5 = np.nan, np.nan
            ess = ope.ess(traj_weights)
#            print(','.join(map(str, (*config, mean, phwis_mean, wdr_mean, ess, var, am, hcope5, len(train_set), len(test_set), train_set.icustay_id.nunique(), test_set.icustay_id.nunique()))))
            results.append(list(map(str, (*config, wis_mean, phwis_mean, wdr_mean, phwdr_mean, ess, var, am, hcope5, len(train_set), len(test_set), train_set.icustay_id.nunique(), test_set.icustay_id.nunique()))))
    return results


# with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
#     futures = [executor.submit(try_get_results, seed, prob) for (seed, prob) in itertools.product(seeds, (1.0,))]

# for f in concurrent.futures.as_completed(futures):
    print(','.join(map(str,itertools.chain(*f.result()))))
# results += joblib.Parallel(n_jobs=10)(joblib.delayed(get_results_seed)(seed, prob) for (seed, prob) in itertools.product(seeds, (1.0, 0.0)))
# results = [get_results_seed(seed, 0.0) for seed in seeds]

In [14]:
results = []
for s, prob in itertools.product(seeds, (1.0,)):
    r = get_results_seed(s, prob)
    results += [r,]
    print(','.join(map(str, r)))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['sa_weights'] = sa_weights / sa_weights.sum()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['term_b'] = ds.reward - ds.q_estimate + gamma * ds.next_v_estimate
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['weighted_sa'] = ds.gamma * ds.sa_weights * ds.term_b
A value is trying to be set o

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['sa_weights'] = sa_weights / sa_weights.sum()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['term_b'] = ds.reward - ds.q_estimate + gamma * ds.next_v_estimate
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['weighted_sa'] = ds.gamma * ds.sa_weights * ds.term_b
A value is trying to be set o

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['term_b'] = ds.reward - ds.q_estimate + gamma * ds.next_v_estimate
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['weighted_sa'] = ds.gamma * ds.sa_weights * ds.term_b
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['sa_weights'] = sa_weights / sa_weights.sum()
A value is trying to be set o

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['sa_weights'] = sa_weights / sa_weights.sum()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['term_b'] = ds.reward - ds.q_estimate + gamma * ds.next_v_estimate
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['weighted_sa'] = ds.gamma * ds.sa_weights * ds.term_b
A value is trying to be set o

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['sa_weights'] = sa_weights / sa_weights.sum()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['term_b'] = ds.reward - ds.q_estimate + gamma * ds.next_v_estimate
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['weighted_sa'] = ds.gamma * ds.sa_weights * ds.term_b
A value is trying to be set o

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['sa_weights'] = sa_weights / sa_weights.sum()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['term_b'] = ds.reward - ds.q_estimate + gamma * ds.next_v_estimate
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['weighted_sa'] = ds.gamma * ds.sa_weights * ds.term_b
A value is trying to be set o

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['sa_weights'] = sa_weights / sa_weights.sum()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['term_b'] = ds.reward - ds.q_estimate + gamma * ds.next_v_estimate
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['weighted_sa'] = ds.gamma * ds.sa_weights * ds.term_b
A value is trying to be set o

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['sa_weights'] = sa_weights / sa_weights.sum()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['term_b'] = ds.reward - ds.q_estimate + gamma * ds.next_v_estimate
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['weighted_sa'] = ds.gamma * ds.sa_weights * ds.term_b
A value is trying to be set o

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['term_b'] = ds.reward - ds.q_estimate + gamma * ds.next_v_estimate
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['weighted_sa'] = ds.gamma * ds.sa_weights * ds.term_b
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['sa_weights'] = sa_weights / sa_weights.sum()
A value is trying to be set o

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['sa_weights'] = sa_weights / sa_weights.sum()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['term_b'] = ds.reward - ds.q_estimate + gamma * ds.next_v_estimate
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['weighted_sa'] = ds.gamma * ds.sa_weights * ds.term_b
A value is trying to be set o

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['sa_weights'] = sa_weights / sa_weights.sum()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['term_b'] = ds.reward - ds.q_estimate + gamma * ds.next_v_estimate
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['weighted_sa'] = ds.gamma * ds.sa_weights * ds.term_b
A value is trying to be set o

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['term_b'] = ds.reward - ds.q_estimate + gamma * ds.next_v_estimate
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['weighted_sa'] = ds.gamma * ds.sa_weights * ds.term_b
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['sa_weights'] = sa_weights / sa_weights.sum()
A value is trying to be set o

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['sa_weights'] = sa_weights / sa_weights.sum()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['term_b'] = ds.reward - ds.q_estimate + gamma * ds.next_v_estimate
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['weighted_sa'] = ds.gamma * ds.sa_weights * ds.term_b
A value is trying to be set o

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['sa_weights'] = sa_weights / sa_weights.sum()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['term_b'] = ds.reward - ds.q_estimate + gamma * ds.next_v_estimate
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['weighted_sa'] = ds.gamma * ds.sa_weights * ds.term_b
A value is trying to be set o

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['term_b'] = ds.reward - ds.q_estimate + gamma * ds.next_v_estimate
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['weighted_sa'] = ds.gamma * ds.sa_weights * ds.term_b
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['sa_weights'] = sa_weights / sa_weights.sum()
A value is trying to be set o

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['sa_weights'] = sa_weights / sa_weights.sum()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['term_b'] = ds.reward - ds.q_estimate + gamma * ds.next_v_estimate
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['weighted_sa'] = ds.gamma * ds.sa_weights * ds.term_b


['train', 'softmax', 'False', '_none', '0.0', 'unsafe', '1.0', '0', '83.45139477836416', '85.36378801091014', '31.50971115083654', '31.690984684129447', '1.0048051293275613', '3.6398292943721344', 'nan', 'nan', '82798', '28261', '5239', '1778'],['test', 'softmax', 'False', '_none', '0.0', 'unsafe', '1.0', '0', '-85.86554841636735', '52.49182814668252', '20.0742664251042', '19.998712119849635', '1.5324306284442246', '13613.460585486413', 'nan', 'nan', '82798', '28261', '5239', '1778'],['train', 'softmax', 'False', '_none', '0.0', 'safe', '1.0', '0', '83.45137614847296', '84.67630701786115', '30.527901063526762', '30.70538787340073', '1.0082637197466948', '1.488157923250371', 'nan', 'nan', '82798', '28261', '5239', '1778'],['test', 'softmax', 'False', '_none', '0.0', 'safe', '1.0', '0', '19.853382803921264', '70.52932685987562', '18.853584255610976', '18.8684995108478', '2.2253615025868556', '2522.1457502409185', 'nan', 'nan', '82798', '28261', '5239', '1778'],['train', 'observed', 'Fals

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['sa_weights'] = sa_weights / sa_weights.sum()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['term_b'] = ds.reward - ds.q_estimate + gamma * ds.next_v_estimate
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['weighted_sa'] = ds.gamma * ds.sa_weights * ds.term_b
A value is trying to be set o

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['sa_weights'] = sa_weights / sa_weights.sum()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['term_b'] = ds.reward - ds.q_estimate + gamma * ds.next_v_estimate
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['weighted_sa'] = ds.gamma * ds.sa_weights * ds.term_b
A value is trying to be set o

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['weighted_sa'] = ds.gamma * ds.sa_weights * ds.term_b
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['sa_weights'] = sa_weights / sa_weights.sum()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['term_b'] = ds.reward - ds.q_estimate + gamma * ds.next_v_estimate
A value is trying to be set o

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['term_b'] = ds.reward - ds.q_estimate + gamma * ds.next_v_estimate
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['weighted_sa'] = ds.gamma * ds.sa_weights * ds.term_b
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['sa_weights'] = sa_weights / sa_weights.sum()
A value is trying to be set o

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['sa_weights'] = sa_weights / sa_weights.sum()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['term_b'] = ds.reward - ds.q_estimate + gamma * ds.next_v_estimate
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['weighted_sa'] = ds.gamma * ds.sa_weights * ds.term_b
A value is trying to be set o

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['term_b'] = ds.reward - ds.q_estimate + gamma * ds.next_v_estimate
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['weighted_sa'] = ds.gamma * ds.sa_weights * ds.term_b
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['sa_weights'] = sa_weights / sa_weights.sum()
A value is trying to be set o

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['term_b'] = ds.reward - ds.q_estimate + gamma * ds.next_v_estimate
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['weighted_sa'] = ds.gamma * ds.sa_weights * ds.term_b
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['sa_weights'] = sa_weights / sa_weights.sum()
A value is trying to be set o

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['sa_weights'] = sa_weights / sa_weights.sum()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['term_b'] = ds.reward - ds.q_estimate + gamma * ds.next_v_estimate
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['weighted_sa'] = ds.gamma * ds.sa_weights * ds.term_b
A value is trying to be set o

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['sa_weights'] = sa_weights / sa_weights.sum()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['term_b'] = ds.reward - ds.q_estimate + gamma * ds.next_v_estimate
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['weighted_sa'] = ds.gamma * ds.sa_weights * ds.term_b
A value is trying to be set o

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['sa_weights'] = sa_weights / sa_weights.sum()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['term_b'] = ds.reward - ds.q_estimate + gamma * ds.next_v_estimate
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['weighted_sa'] = ds.gamma * ds.sa_weights * ds.term_b
A value is trying to be set o

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['weighted_sa'] = ds.gamma * ds.sa_weights * ds.term_b
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['sa_weights'] = sa_weights / sa_weights.sum()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['term_b'] = ds.reward - ds.q_estimate + gamma * ds.next_v_estimate
A value is trying to be set o

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['sa_weights'] = sa_weights / sa_weights.sum()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['term_b'] = ds.reward - ds.q_estimate + gamma * ds.next_v_estimate
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['weighted_sa'] = ds.gamma * ds.sa_weights * ds.term_b
A value is trying to be set o

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['sa_weights'] = sa_weights / sa_weights.sum()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['term_b'] = ds.reward - ds.q_estimate + gamma * ds.next_v_estimate
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['weighted_sa'] = ds.gamma * ds.sa_weights * ds.term_b
A value is trying to be set o

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['sa_weights'] = sa_weights / sa_weights.sum()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['term_b'] = ds.reward - ds.q_estimate + gamma * ds.next_v_estimate
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['weighted_sa'] = ds.gamma * ds.sa_weights * ds.term_b
A value is trying to be set o

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['sa_weights'] = sa_weights / sa_weights.sum()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['term_b'] = ds.reward - ds.q_estimate + gamma * ds.next_v_estimate
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ds['weighted_sa'] = ds.gamma * ds.sa_weights * ds.term_b
A value is trying to be set o

KeyboardInterrupt: 

In [42]:
safety_prob=0.0
shaping_scalar
sm_policy_file.format(seed, shaping_fname[shaping], shaping_scalar, safety_prob)


'models/mcp_softmax_policy_2_none_0.0.bin'

In [30]:
term_a 18.260115646300143
summed_weighted_traj -3.728005913133704
train,softmax,False,0.0,unsafe,0,83.35879165683204,64.26925030609227,14.53210973316644,1.0012529487886903,367.50011981531804,nan,nan,82798,28261,5239,1778
term_a 8.109507205046066
summed_weighted_traj -4.404790584427434
test,softmax,False,0.0,unsafe,0,83.45137524501385,64.42884816003073,3.7047166206186324,1.000075655085301,361.82919672053043,nan,nan,82798,28261,5239,1778
term_a 18.260115646300143
summed_weighted_traj -3.9867127035062797
train,softmax,False,0.0,safe,0,-81.74287777096369,-47.44191098643471,14.273402942793863,1.020684025412072,1272.0639073697655,nan,nan,82798,28261,5239,1778


SyntaxError: invalid syntax (<ipython-input-30-faa011ab386f>, line 1)

In [101]:
importlib.reload(ope)
# wdr_mean = ope.wdr_policy(ds, evaluation_policy, behavior_policy, q_estimator, v_estimator, 0.99)
# wdr_mean

<module 'ope' from '/home/floris/Documents/vu/mimic-project/ventai/notebooks/ope.py'>

In [None]:
# 1
q_estimator.min(), q_estimator.max(), v_estimator.min(), v_estimator.max()

In [None]:
# 1
q_estimator.mean(), q_estimator.max()

In [None]:
test_set[test_set.terminal].next_state.value_counts()

In [None]:
train_set[train_set.terminal].next_state.nunique()

In [None]:
train_set[train_set.terminal].next_state.value_counts()

In [None]:
np.apply_along_axis(lambda x: print(x), 1, test_set[['state', 'action_discrete']].to_numpy())

In [None]:
%%time
for seed in seeds:
    for shaping in shaping_fname.keys():
        for mp in (.01, .05, .1, .5, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 10.0):
            mixing_prob = mp / 10.0
            if shaping == 'unshaped':
                shaped = False
                shaping_scalar = 0.0
            else:
                shaped = True
                shaping_scalar = shaping.split('-')[-1]
            np.random.seed(seed)
            test_set = postprocess(pd.read_csv(test_set_file.format(seed)))
            train_set = postprocess(pd.read_csv(train_set_file.format(seed)))

            behavior_policy = joblib.load(behavior_policy_file.format(seed,''))
            behavior_train_policy = joblib.load(behavior_policy_train_file.format(seed,''))

            sm_unsafe = joblib.load(sm_policy_file.format(seed, shaping_fname[shaping]))
            mixed_policy = mixing_prob * sm_unsafe + (1-mixing_prob) * behavior_train_policy
            mixed_safe = safety.repaired_safe_soft(mixed_policy, behavior_train_policy)

            evaluations = [
                (train_set, mixed_policy, behavior_policy, 'train', 'softmax', shaped, shaping_scalar, 'unsafe', seed),
                (test_set, mixed_policy, behavior_policy, 'test', 'softmax', shaped, shaping_scalar, 'unsafe', seed),
                (train_set, mixed_safe, behavior_policy, 'train', 'softmax', shaped, shaping_scalar, 'safe', seed),
                (test_set, mixed_safe, behavior_policy, 'test', 'softmax', shaped, shaping_scalar, 'safe', seed),
            ]
            for ds, evaluation_policy, behavior_policy, *config in evaluations:
                mean, var, traj_weights = ope.wis_policy(ds, evaluation_policy, behavior_policy)
                phwis_mean, var, _ = ope.phwis_policy(ds, evaluation_policy, behavior_policy)
#                 am = ope.am(ds, evaluation_policy, behavior_policy, delta=0.05)
#                 hcope5 = ope.hcope(ds, evaluation_policy, behavior_policy, delta=0.05, c=5)
                am, hcope5 = np.nan, np.nan
                ess = ope.ess(traj_weights)
                print(','.join(map(str, (*config, mixing_prob, mean, phwis_mean, ess, var, am, hcope5, len(train_set), len(test_set), train_set.icustay_id.nunique(), test_set.icustay_id.nunique()))))
                # TODO: write result to file with config

In [None]:
%%time
def get_result(mixing_prob, seed, shaping, train_set, test_set, train_test_set, behavior_policy, behavior_train_policy):
    mixing_prob = mp / 10.0
    if shaping == 'unshaped':
        shaped = False
        shaping_scalar = 0.0
    else:
        shaped = True
        shaping_scalar = shaping.split('-')[-1]
    np.random.seed(seed)

    sm_unsafe = joblib.load(sm_policy_file.format(seed, shaping_fname[shaping]))
    sm_safe = safety.repaired_safe(sm_unsafe, behavior_train_policy)

    evaluations = [
        (train_set, sm_unsafe, behavior_policy, 'train', 'softmax', shaped, shaping_scalar, 'unsafe', seed),
        (test_set, sm_unsafe, behavior_policy, 'test', 'softmax', shaped, shaping_scalar, 'unsafe', seed),
        (train_set, sm_safe, behavior_policy, 'train', 'softmax', shaped, shaping_scalar, 'safe', seed),
        (test_set, sm_safe, behavior_policy, 'test', 'softmax', shaped, shaping_scalar, 'safe', seed),
    ]
    results = []
    for ds, evaluation_policy, behavior_policy, *config in evaluations:
        mean, var, traj_weights = ope.wis_policy(ds, evaluation_policy, behavior_policy)
        phwis_mean, var, _ = ope.phwis_policy(ds, evaluation_policy, behavior_policy)
        q_estimator, v_estimator = ope.infer_estimators_func(train_test_set, evaluation_policy, 0.99, 100)
        wdr_mean = ope.wdr_policy(ds, evaluation_policy, behavior_policy, q_estimator, v_estimator, 0.99)
#                 am = ope.am(ds, evaluation_policy, behavior_policy, delta=0.05)
#                 hcope5 = ope.hcope(ds, evaluation_policy, behavior_policy, delta=0.05, c=5)
        am, hcope5 = np.nan, np.nan
        ess = ope.ess(traj_weights)
        results.append(list(map(str, (*config, mixing_prob, mean, phwis_mean, ess, var, am, hcope5, len(train_set), len(test_set), train_set.icustay_id.nunique(), test_set.icustay_id.nunique()))))
    return results

all_results = []
for shaping in shaping_fname.keys():
    print(shaping)
    test_set = postprocess(pd.read_csv(test_set_file.format(seed)))
    train_set = postprocess(pd.read_csv(train_set_file.format(seed)))
    train_test_set = pd.concat([train_set, test_set])

    behavior_policy = joblib.load(behavior_policy_file.format(seed,''))
    behavior_train_policy = joblib.load(behavior_policy_train_file.format(seed,''))
    mp = 0.0
    results = joblib.Parallel(n_jobs=10)(joblib.delayed(get_result)(mp, seed, shaping, train_set, test_set, train_test_set, behavior_policy, behavior_train_policy) for seed in seeds)
    print(results)
    all_results += results

In [None]:
all_results

In [None]:
%%time
# seeds = range(10)
seeds = (0,)
for seed in seeds:
    for shaping in ('unshaped',):
        np.random.seed(seed)
        test_set = postprocess(pd.read_csv(test_set_file.format(shaping,seed)))
        train_set = postprocess(pd.read_csv(train_set_file.format(shaping,seed)))
        
        behavior_policy = utils.repair_policy_uniform(joblib.load(behavior_policy_file.format(seed,'')))
        behavior_train_policy = joblib.load(behavior_policy_train_file.format(seed,''))
        behavior_test_policy = joblib.load(behavior_policy_test_file.format(seed,''))
        behavior_safe_train = safety.repaired_safe(behavior_train_policy, behavior_train_policy)

        greedy_unsafe = utils.repair_unsupported_greedy_policy(
            joblib.load(greedy_policy_file.format(seed, '')),
            train_set
        )
        greedy_safe = safety.repaired_safe(greedy_unsafe, behavior_train_policy, greedy=True)
        sm_unsafe = joblib.load(sm_policy_file.format(seed, ''))
        sm_safe = safety.repaired_safe(sm_unsafe, behavior_train_policy)
                
        evaluations = (
            (train_set, behavior_train_policy, behavior_train_policy, 'train', 'observed', 'unshaped', 'unsafe', seed),
            (test_set, behavior_test_policy, behavior_test_policy, 'test', 'observed', 'unshaped', 'unsafe', seed),
            (train_set, behavior_train_policy, behavior_policy, 'train', 'behavior', 'unshaped', 'unsafe', seed),
            (test_set, behavior_train_policy, behavior_policy, 'test', 'behavior', 'unshaped', 'unsafe', seed),
            (train_set, behavior_safe_train, behavior_policy, 'train', 'behavior', 'unshaped', 'safe', seed),
            (test_set, behavior_safe_train, behavior_policy, 'test', 'behavior', 'unshaped', 'safe', seed),
            (train_set, greedy_unsafe, behavior_policy, 'train', 'greedy', 'unshaped', 'unsafe', seed),
            (test_set, greedy_unsafe, behavior_policy, 'test', 'greedy', 'unshaped', 'unsafe', seed),
            (train_set, greedy_safe, behavior_policy, 'train', 'greedy', 'unshaped', 'safe', seed),
            (test_set, greedy_safe, behavior_policy, 'test', 'greedy', 'unshaped', 'safe', seed),
            (train_set, sm_unsafe, behavior_policy, 'train', 'softmax', 'unshaped', 'unsafe', seed),
            (test_set, sm_unsafe, behavior_policy, 'test', 'softmax', 'unshaped', 'unsafe', seed),
            (train_set, sm_safe, behavior_policy, 'train', 'softmax', 'unshaped', 'safe', seed),
            (test_set, sm_safe, behavior_policy, 'test', 'softmax', 'unshaped', 'safe', seed)
        )

In [None]:
behavior_policy.shape[1]

# HCOPE hyperparameter optimization

In [None]:
cs = (.01, .1, 1, 2, 5, 10, 20, 50, 1e2, 1.2e2, 1.5e2, 1.75e2, 1e3, 1.5e3, 1e4, 1e5, 1e6)
us = True
delta = .05
n_post = train_set.icustay_id.nunique()
print('sm_unsafe')
sm_unsafe_results = [ope.hcope_prediction(test_set, sm_unsafe, behavior_policy, n_post=n_post, c=c, delta=delta, unscale=us) for c in cs]
print('sm_safe')
sm_safe_results = [ope.hcope_prediction(test_set, sm_safe, behavior_policy, n_post=n_post,  c=c, delta=delta, unscale=us) for c in cs]
print('observed')
observed_results = [ope.hcope_prediction(test_set, behavior_policy, behavior_policy, n_post=n_post, c=c, delta=delta, unscale=us) for c in cs]
print('bh')
bh_unsafe_results = [ope.hcope_prediction(test_set, behavior_train_policy, behavior_policy, n_post=n_post, c=c, delta=delta, unscale=us) for c in cs]
print('bh_safe')
bh_safe_results = [ope.hcope_prediction(test_set, behavior_safe_train, behavior_policy, n_post=n_post, c=c, delta=delta, unscale=us) for c in cs]
print('sm_unsafe')
greedy_unsafe_results = [ope.hcope_prediction(test_set, greedy_unsafe, behavior_policy, n_post=n_post, c=c, delta=delta, unscale=us) for c in cs]
print('sm_safe')
greedy_safe_results = [ope.hcope_prediction(test_set, greedy_safe, behavior_policy, n_post=n_post, c=c, delta=delta, unscale=us) for c in cs]

In [None]:
algorithms = sorted((
    ('softmax-unsafe', sm_unsafe_results),
    ('softmax-safe', sm_safe_results),
    ('observed', observed_results),
    ('behavior-unsafe', bh_unsafe_results),
    ('behavior-safe', bh_safe_results),
#     ('greedy-unsafe', greedy_unsafe_results),
#     ('greedy-safe', greedy_safe_results),
))
colors = sns.color_palette(n_colors=len(algorithms))
for i, (label, results) in enumerate(algorithms):
    ax = plt.scatter(x=cs, y=results, c=colors[i], label=label)
plt.xscale('log')
plt.yscale('symlog')
plt.xlabel('c')
plt.ylabel('Lower Bound')
plt.title('Optimization of c parameter')
plt.legend(loc=2, bbox_to_anchor=(1.0,1.0))
plt.axhline(-100, c='black', alpha=.2, linestyle='--')
plt.axhline(100, c='black', alpha=.2, linestyle='--')
plt.show() 

# ANALYSIS

In [None]:
w_sm_unsafe_train = ope.ois_traj_weights(train_set, sm_unsafe, behavior_policy)
patient_id = train_set[train_set['mort90day'] == 't'].groupby('icustay_id').first().sample(1).index[0]
patient = train_set[train_set.icustay_id == patient_id]
action_dists = []
for s in patient.state:
    action_dists.append(sm_unsafe[s, :])
action_dists = np.array(action_dists)
c_a_probs = []
alt_act = []
for c_a, pi_a in zip(patient.action_discrete, action_dists):
    c_a_probs.append(pi_a[c_a])
    alt_act.append(pi_a.argmax())
c_a_probs == np.array(c_a_probs)
i = 0
print("Importance weight:", w_sm_unsafe_train[patient.index[0]])
for c_a, e_a in zip(patient.action_discrete, alt_act):
    print(i, end='')
    print('\tclincn', utils.to_action_ranges(c_a))
    print('\tevaltn', utils.to_action_ranges(e_a))
    i += 1

In [None]:
patient_id = train_set[train_set['mort90day'] == 't'].groupby('icustay_id').first().sample(1).index[0]
patient = train_set[train_set.icustay_id == patient_id]
action_dists = []
for s in patient.state:
    action_dists.append(sm_unsafe[s, :])
action_dists = np.array(action_dists)
c_a_probs = []
alt_act = []
for c_a, pi_a in zip(patient.action_discrete, action_dists):
    c_a_probs.append(pi_a[c_a])
    alt_act.append(pi_a.argmax())
c_a_probs == np.array(c_a_probs)
i = 0
print("Importance weight:", w_sm_unsafe_train[patient.index[0]])
for c_a, e_a in zip(patient.action_discrete, alt_act):
    print(i, end='')
    print('\tclincn', utils.to_action_ranges(c_a))
    print('\tevaltn', utils.to_action_ranges(e_a))
    i += 1

In [None]:
patient_id = train_set[train_set['mort90day'] == 't'].groupby('icustay_id').first().sample(1).index[0]
patient = train_set[train_set.icustay_id == patient_id]
action_dists = []
for s in patient.state:
    action_dists.append(sm_unsafe[s, :])
action_dists = np.array(action_dists)
c_a_probs = []
alt_act = []
for c_a, pi_a in zip(patient.action_discrete, action_dists):
    c_a_probs.append(pi_a[c_a])  (-5.788365278029741, 18.05224023727751)
    alt_act.append(pi_a.argmax())
c_a_probs == np.array(c_a_probs)
i = 0
print("Weight:", w_sm_unsafe_train[patient.index[0]])
for c_a, e_a in zip(patient.action_discrete, alt_act):
    print(i, end='')
    print('\tclincn', utils.to_action_ranges(c_a), c_a_probs[i])
    print('\tevaltn', utils.to_action_ranges(e_a))
    i += 1

# Safety analysis

In [7]:
compliant_behavior_score = behavior_policy.copy()
for action_id in range(7**3):
    if not safety.action_compliance_map[action_id]:
        compliant_behavior_score[:, action_id] = float('-inf')

compliant_behavior_score[compliant_behavior_score == 0.0] = float('-inf')
compliant_behavior_policy = scipy.special.softmax(compliant_behavior_score, axis=1)

assert compliant_behavior_policy.shape == (650, 7**3)
print(compliant_behavior_policy.sum(axis=1).min(), compliant_behavior_policy.sum(axis=1).max())

NameError: name 'behavior_policy' is not defined

In [None]:
((compliant_behavior_policy > 0.0) & (behavior_policy ==0.0))

In [None]:
((behavior_policy == 0.0) & (compliant_behavior_policy > 0.0)).any()

In [None]:
print("compliant behavior, WIS")
train_mean, train_var, train_weights = ope.wis_policy(train_set, compliant_behavior_policy, behavior_policy)

print("pi_b on train:", (train_mean, train_var, (train_weights > 0.001).sum()))
test_mean, test_var, test_weights = ope.wis_policy(test_set, compliant_behavior_policy, behavior_policy)
print("pi_b on test :", (test_mean, test_var, (test_weights > 0.001).sum()))

In [None]:
safety.state_compliance_clinical(train_set, safety.avg_clinical_timestep).mean()

In [None]:
safety.action_compliance_clinical(train_set).mean()

In [None]:
compliance_fs = [
        safety.tv_compliance_clinical,
        safety.rr_compliance_clinical,
        safety.spo2_compliance_clinical,
        safety.pplat_compliance_clinical,
        safety.ph_compliance_clinical,
]
compliance_scores = [f(train_set) for f in compliance_fs]

In [None]:
for s in compliance_scores:
    print(s.mean())

In [None]:
train_set.tv_derived.min(), train_set.tv_derived.max(), train_set.tv_derived.mean(), train_set.tv_derived.median()

In [None]:
shaping = 'avgpotential-1.0'
seed = 0
np.random.seed(seed)
test_set = postprocess(pd.read_csv(test_set_file.format('unshaped',seed)))
train_set = postprocess(pd.read_csv(train_set_file.format('unshaped',seed)))

behavior_policy = utils.repair_policy_uniform(joblib.load(behavior_policy_file.format(seed,'')))
behavior_train_policy = joblib.load(behavior_policy_train_file.format(seed,''))
behavior_test_policy = joblib.load(behavior_policy_test_file.format(seed,''))
behavior_safe_train = safety.repaired_safe(behavior_train_policy, behavior_train_policy)

greedy_unsafe = utils.repair_unsupported_greedy_policy(
    joblib.load(greedy_policy_file.format(seed, shaping_fname[shaping])),
    train_set
)
greedy_safe = safety.repaired_safe(greedy_unsafe, behavior_train_policy, greedy=True)
sm_unsafe = joblib.load(sm_policy_file.format(seed, shaping_fname[shaping]))
sm_safe = safety.repaired_safe(sm_unsafe, behavior_train_policy)

mean, var, _ = ope.wis_policy(test_set, sm_unsafe, behavior_policy)
mean,var

In [None]:
mean, var, _ = ope.wis_policy(train_set, sm_unsafe, behavior_policy)
mean, var

In [None]:
mean, var, _ = ope.wis_policy(train_set, sm_unsafe, behavior_policy)
mean, var

In [None]:
mean, var, _ = ope.wis_policy(test_set, greedy_unsafe, behavior_policy)
mean,var

In [None]:
shaping = 'unshaped'
seed = 0
np.random.seed(seed)
test_set = postprocess(pd.read_csv(test_set_file.format('unshaped',seed)))
train_set = postprocess(pd.read_csv(train_set_file.format('unshaped',seed)))

behavior_policy = utils.repair_policy_uniform(joblib.load(behavior_policy_file.format(seed,'')))
behavior_train_policy = joblib.load(behavior_policy_train_file.format(seed,''))
behavior_test_policy = joblib.load(behavior_policy_test_file.format(seed,''))
behavior_safe_train = safety.repaired_safe(behavior_train_policy, behavior_train_policy)

greedy_unsafe = utils.repair_unsupported_greedy_policy(
    joblib.load(greedy_policy_file.format(seed, shaping_fname[shaping])),
    train_set
)
greedy_safe = safety.repaired_safe(greedy_unsafe, behavior_train_policy, greedy=True)
sm_unsafe = joblib.load(sm_policy_file.format(seed, shaping_fname[shaping]))
sm_safe = safety.repaired_safe(sm_unsafe, behavior_train_policy)

mean, var, _ = ope.wis_policy(test_set, sm_unsafe, behavior_policy)
mean,var

In [None]:
mean, var, _ = ope.wis_policy(train_set, sm_unsafe, behavior_policy)
mean, var