In [None]:
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from tqdm import tqdm
import pandas as pd

import scipy
from scipy.optimize import Bounds
from scipy.optimize import LinearConstraint
from scipy.optimize import NonlinearConstraint
from scipy.optimize import SR1, BFGS
from scipy.optimize import minimize

import confound_mdp
import confound_ope
import confound_env

from core.sepsisSimDiabetes.State import State
from core.sepsisSimDiabetes.Action import Action
from core import generator_confounded_mdp as DGEN
from core import conf_wis as CWIS
from core import loss_minimization as LB
from utils.utils import *

In [None]:
envs = []
# # each row:
# #   [mdp , pi_b, pi_e, horizon, gamma, nStates, nActions, term]

pi_b, P, R, x_dist, u_dist, gamma = confound_env.toy227(0.25, 0.35)
toy = confound_mdp.ConfoundMDP(P, R, x_dist, u_dist, gamma)
horizon = 5
nStates = P.shape[2]
nActions = P.shape[1]
pi_e = np.zeros((nStates, nActions))
for i in range(nStates):
    pi_e[i] = [0.3, 0.7]
    
envs.append([toy, pi_b, pi_e, horizon, gamma, nStates, nActions, -1])

horizon = 4
graph_len = 4
pi_b, P, R, x_dist, u_dist, gamma = confound_env.graph_opetools(horizon=graph_len, slip=0.25, confound_weight=0.23)
R = -1*R
graph = confound_mdp.ConfoundMDP(P, R, x_dist, u_dist, gamma)
nStates = P.shape[2]
nActions = P.shape[1]

pi_e = np.zeros((nStates, nActions))
for i in range(nStates):
    pi_e[i] = [0.3, 0.7]
    
envs.append([graph, pi_b, pi_e, horizon, gamma, nStates, nActions, -1])

horizon = 20
pi_b, P, R, x_dist, u_dist, gamma = confound_env.toymc_opetools(n_left=8, n_right=8, horizon=20, slip=0.15, confound_weight=0.6)
#R = -1*R
toymc = confound_mdp.ConfoundMDP(P, R, x_dist, u_dist, gamma)

nStates = P.shape[2]
nActions = P.shape[1]

pi_e = np.zeros((nStates, nActions))
for i in range(nStates):
    pi_e[i] = [0.15, 0.85]
    
envs.append([toymc, pi_b, pi_e, horizon, gamma, nStates, nActions, -1])

horizon = 8
pi_b, P, R, x_dist, u_dist, gamma = confound_env.gridworld_opetools(horizon = horizon, slip = 0.04, confound_weight=0.6)
#R = -1*R
gridworld = confound_mdp.ConfoundMDP(P, R, x_dist, u_dist, gamma)

nStates = P.shape[2]
nActions = P.shape[1]

pi_e = np.zeros((nStates, nActions))
for i in range(nStates):
    pi_e[i] = [0.4, 0.1, 0.4, 0.1]
    
envs.append([gridworld, pi_b, pi_e, horizon, gamma, nStates, nActions, -1])

### in order to work with namkoong et al code need to add t and remove u

In [None]:
def transform_dataset(dataset):
    n, horizon, _ = dataset.shape
    
    # dataset: x,a,u,x',r
    # keramati data: t,a,x,x',r
    new_data = np.zeros((n, horizon, 5))
    new_data[:,:,0] = np.arange(horizon)
    new_data[:,:,1] = dataset[:,:,1]
    new_data[:,:,2] = dataset[:,:,0]
    new_data[:,:,3] = dataset[:,:,3]
    new_data[:,:,4] = dataset[:,:,4]
    return new_data

# Compare sensitivity models

In [None]:
true_gamma_mb_envs = []
true_gamma_nky_envs = []

Pbs = [10, 100000]
nPbs = len(Pbs)

hadds = [0]

for mdp , pi_b, pi_e, base_horizon, gamma, nStates, nActions, term in envs:
    
    for h in hadds:
        horizon = base_horizon + h

        print("running env...")
        dataset = confound_mdp.collect_sample(200000, mdp, pi_b, horizon)
        data = dataset.reshape((dataset.shape[0]*dataset.shape[1],5))
        Phat = confound_ope.estimate_P(dataset, mdp)
        pihat = confound_ope.estimate_pi(dataset, mdp)
        for a in range(nActions):
            for s in range(nStates):
                if Phat[a,s].sum() == 0:
                    Phat[a,s,term] = 1
                if pihat[s].sum() == 0:
                    pihat[s,:] = 1/nActions
        pi_avg = pi_b[0] * u_dist[0] + pi_b[1] * u_dist[1]

        print("value of pi_e with no confounding")
        Q0 = np.zeros((nStates, nActions))
        nom_q = Q0.copy()
        for t in range(horizon):
            nom_q = confound_ope.fitted_q_update(nom_q, pi_e, dataset, mdp)
        print(mdp.get_value(nom_q,pi_e)[1])

        nky_data = transform_dataset(dataset)
        returns = confound_mdp.calc_returns(dataset, gamma, horizon)
       
        nky_results = np.zeros((1))            
        mb_results = np.zeros((1, nPbs))
        
        Q0 = np.zeros((nStates, nActions))
        q_reparam_samp = Q0.copy()
        for t in range(horizon-1):
            q_reparam_samp = confound_ope.fitted_q_update(q_reparam_samp, pi_e, dataset, mdp)
        V0 = mdp.get_value(q_reparam_samp,pi_e)[0]
        
        worst_nky = 0
        for x in range(nStates):
            for a in range(nActions):
                for u in range(2):
                    ratio1 = pi_b[u, x, a] / (1-pi_b[u, x, a])
                    ratio2 = pi_b[1-u, x, a] / (1-pi_b[1-u, x, a])
                    ratio_ratio = ratio1/ratio2
                    if ratio_ratio > worst_nky:
                        worst_nky = ratio_ratio
        print(worst_nky)

        worst_msm = 0
        for x in range(nStates):
            for a in range(nActions):
                for u in range(2):
                    ratio1 = pi_b[u, x, a] / (1-pi_b[u, x, a])
                    ratio2 = pi_avg[x, a] / (1-pi_avg[x,a])
                    ratio_ratio = ratio1/ratio2
                    if ratio_ratio > worst_msm:
                        worst_msm = ratio_ratio
        print(worst_msm)

        i = 0
        
        config = {'Gamma': worst_nky, 'lr':5 * 1e-2, 'epoch':200, 'nS': nStates, 
              'nA': nActions, 'bootstrap': True, 'n_bootstrap': 50}
        lb_data = {'samps': nky_data, 'returns': returns}
        eval_pol = (pi_e.T, pi_e.T)
        test_lb = LB.loss_minimization(config=config, data=lb_data, 
                                           evaluation_policies=eval_pol, scope='toy_test')
        test_lb_ours, test_lb_naive, _ = test_lb.run(use_tqdm=True)
        print(test_lb_ours)
        nky_results[i] = test_lb_ours.mean()

        config = {'Gamma': worst_nky, 'lr':5 * 1e-2, 'epoch':200, 'nS': nStates, 
              'nA': nActions, 'bootstrap': False, 'n_bootstrap': 0}
        lb_data = {'samps': nky_data, 'returns': returns}
        eval_pol = (pi_e.T, pi_e.T)
        test_lb = LB.loss_minimization(config=config, data=lb_data, 
                                           evaluation_policies=eval_pol, scope='toy_test')
        test_lb_ours, test_lb_naive, _ = test_lb.run(use_tqdm=True)
        print("nky no bootstrap")
        print(test_lb_ours)
        #nky_results[i] = test_lb_ours.mean()

        for j,P_bound in enumerate(Pbs):
            fixed_u_v = confound_ope.fixed_u_gp_s_rect(V0, pi_e, 0.50, Phat, pihat, P_bound, worst_msm, mdp)
            mb_results[i,j] = fixed_u_v @ mdp.x_dist
            
        true_gamma_mb_envs.append(mb_results)
        true_gamma_nky_envs.append(nky_results)

In [None]:
true_gamma_mb_envs

In [None]:
true_gamma_nky_envs

# Force same $\Gamma$

In [None]:
mb_envs = []
nky_envs = []

gams = [1, 2, 4, 10]
nGams = len(gams)

Pbs = [10, 100000]
#Pbs = [100000]
nPbs = len(Pbs)

hadds = [0]

for mdp , pi_b, pi_e, base_horizon, gamma, nStates, nActions, term in envs:
    
    for h in hadds:
        horizon = base_horizon + h

        print("running env...")
        dataset = confound_mdp.collect_sample(200000, mdp, pi_b, horizon)
        data = dataset.reshape((dataset.shape[0]*dataset.shape[1],5))
        Phat = confound_ope.estimate_P(dataset, mdp)
        pihat = confound_ope.estimate_pi(dataset, mdp)
        for a in range(nActions):
            for s in range(nStates):
                if Phat[a,s].sum() == 0:
                    Phat[a,s,term] = 1
                if pihat[s].sum() == 0:
                    pihat[s,:] = 1/nActions
        pi_avg = pi_b[0] * u_dist[0] + pi_b[1] * u_dist[1]

        print("value of pi_e with no confounding")
        Q0 = np.zeros((nStates, nActions))
        nom_q = Q0.copy()
        for t in range(horizon):
            nom_q = confound_ope.fitted_q_update(nom_q, pi_e, dataset, mdp)
        print(mdp.get_value(nom_q,pi_e)[1])

        nky_data = transform_dataset(dataset)
        returns = confound_mdp.calc_returns(dataset, gamma, horizon)
       
        nky_results = np.zeros((nGams))            
        mb_results = np.zeros((nGams, nPbs))
        
        Q0 = np.zeros((nStates, nActions))
        q_reparam_samp = Q0.copy()
        for t in range(horizon-1):
            q_reparam_samp = confound_ope.fitted_q_update(q_reparam_samp, pi_e, dataset, mdp)
        V0 = mdp.get_value(q_reparam_samp,pi_e)[0]

        for i,gam in enumerate(gams):
            config = {'Gamma': gam, 'lr':5 * 1e-2, 'epoch':200, 'nS': nStates, 
                  'nA': nActions, 'bootstrap': True, 'n_bootstrap': 50}
            lb_data = {'samps': nky_data, 'returns': returns}
            eval_pol = (pi_e.T, pi_e.T)
            test_lb = LB.loss_minimization(config=config, data=lb_data, 
                                               evaluation_policies=eval_pol, scope='toy_test')
            test_lb_ours, test_lb_naive, _ = test_lb.run(use_tqdm=True)
            print(test_lb_ours)
            nky_results[i] = test_lb_ours.mean()
            
            config = {'Gamma': gam, 'lr':5 * 1e-2, 'epoch':200, 'nS': nStates, 
                  'nA': nActions, 'bootstrap': False, 'n_bootstrap': 0}
            lb_data = {'samps': nky_data, 'returns': returns}
            eval_pol = (pi_e.T, pi_e.T)
            test_lb = LB.loss_minimization(config=config, data=lb_data, 
                                               evaluation_policies=eval_pol, scope='toy_test')
            test_lb_ours, test_lb_naive, _ = test_lb.run(use_tqdm=True)
            print("nky no bootstrap")
            print(test_lb_ours)
            #nky_results[i] = test_lb_ours.mean()
            
            for j,P_bound in enumerate(Pbs):
                fixed_u_v = confound_ope.fixed_u_gp_s_rect(V0, pi_e, 0.50, Phat, pihat, P_bound, gam, mdp)
                mb_results[i,j] = fixed_u_v @ mdp.x_dist
            
        mb_envs.append(mb_results)
        nky_envs.append(nky_results)

In [None]:
mb_envs

In [None]:
nky_envs

In [None]:
# import pickle

# pickle.dump( mb_envs, open( "mb_namkoong_compare_experiments.p", "wb" ) )
# pickle.dump( nky_envs, open( "nky_namkoong_compare_experiments.p", "wb" ) )