# Varying Coverage

In [1]:
!git clone https://github.com/clinicalml/gumbel-max-scm.git

Cloning into 'gumbel-max-scm'...
remote: Enumerating objects: 113, done.[K
remote: Counting objects: 100% (3/3), done.[K
remote: Compressing objects: 100% (3/3), done.[K
remote: Total 113 (delta 0), reused 0 (delta 0), pack-reused 110[K
Receiving objects: 100% (113/113), 1.48 MiB | 16.61 MiB/s, done.
Resolving deltas: 100% (28/28), done.


In [2]:
#Enable importing code from parent directory
import os, sys
simulator_path = os.path.abspath('./gumbel-max-scm')
sys.path.insert(1, simulator_path)

In [3]:
!pip install pymdptoolbox

Collecting pymdptoolbox
  Downloading pymdptoolbox-4.0-b3.zip (29 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: pymdptoolbox
  Building wheel for pymdptoolbox (setup.py) ... [?25l[?25hdone
  Created wheel for pymdptoolbox: filename=pymdptoolbox-4.0b3-py3-none-any.whl size=25656 sha256=bd14b51d6ea1b17c1930b9eaac3933196a5de79d7ea7032534ca7f2f725c4162
  Stored in directory: /root/.cache/pip/wheels/2b/e7/c7/d7abf9e309f3573a934fed2750c70bd75d9e9d901f7f16e183
Successfully built pymdptoolbox
Installing collected packages: pymdptoolbox
Successfully installed pymdptoolbox-4.0b3


**IMPORTANT NOTE:** At this stage, to reproduce our experiments, one must modify line 38 of `gumbel-max-scm/sepsisSimDiabetes/DataGenerator.py` so that it reads:

```
mdp = MDP(init_state_idx=%state%,
          policy_array=policy, policy_idx_type=policy_idx_type,
          p_diabetes=p_diabetes)

```

We have essentially set the initial state to a fixed value so that we may estimate the Q-function from that state. Additionally, line 58 of the same file must be modified to:

```
mdp.state = mdp.get_new_state(state_idx = %state%)
```

In [4]:
import numpy as np
import cf.counterfactual as cf
import cf.utils as utils
import pandas as pd
import pickle
import itertools as it
from tqdm import tqdm_notebook as tqdm
from scipy.linalg import block_diag

# Sepsis Simulator code
from sepsisSimDiabetes.State import State
from sepsisSimDiabetes.Action import Action
from sepsisSimDiabetes.DataGenerator import DataGenerator
import sepsisSimDiabetes.MDP as simulator

import mdptoolboxSrc.mdp as mdptools

import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

## Generate Data Set

In [67]:
import numpy as np
SEED = 1
np.random.seed(SEED)
NSIMSAMPS = 100000  # Samples to draw from the simulator
NSTEPS = 20  # Max length of each trajectory
NCFSAMPS = 5  # Counterfactual Samples per observed sample
DISCOUNT_Pol = 0.99 # Used for computing optimal policies
DISCOUNT = 1 # Used for computing actual reward
PHYS_EPSILON = 0.05 # Used for sampling using physician pol as eps greedy

PROB_DIAB = 0.0#1.0

# Option 1: Use bootstrapping w/replacement on the original NSIMSAMPS to estimate errors
USE_BOOSTRAP=True
N_BOOTSTRAP = 100

# Option 2: Use repeated sampling (i.e., NSIMSAMPS fresh simulations each time) to get error bars;
# This is done in the appendix of the paper, but not in the main paper
N_REPEAT_SAMPLING = 1

# These are properties of the simulator, do not change
n_actions = Action.NUM_ACTIONS_TOTAL
n_components = 2

# These are added as absorbing states
n_states_abs = State.NUM_OBS_STATES + 2
discStateIdx = n_states_abs - 1
deadStateIdx = n_states_abs - 2

# Set up Variables for Policy and Data

In [68]:
import zipfile
with zipfile.ZipFile("gumbel-max-scm/data/diab_txr_mats-replication.zip", 'r') as zip_ref:
    zip_ref.extractall("gumbel-max-scm/data")

In [69]:
# Get the transition and reward matrix from file
with open("gumbel-max-scm/data/diab_txr_mats-replication.pkl", "rb") as f:
    mdict = pickle.load(f)

tx_mat = mdict["tx_mat"]
r_mat = mdict["r_mat"]
p_mixture = np.array([1 - PROB_DIAB, PROB_DIAB])

In [70]:
from scipy.linalg import block_diag

tx_mat_full = np.zeros((n_actions, State.NUM_FULL_STATES, State.NUM_FULL_STATES))
r_mat_full = np.zeros((n_actions, State.NUM_FULL_STATES, State.NUM_FULL_STATES))

for a in range(n_actions):
    tx_mat_full[a, ...] = block_diag(tx_mat[0, a, ...], tx_mat[1, a,...])
    r_mat_full[a, ...] = block_diag(r_mat[0, a, ...], r_mat[1, a, ...])

In [71]:
fullMDP = cf.MatrixMDP(tx_mat_full, r_mat_full)
fullPol = fullMDP.policyIteration(discount=DISCOUNT_Pol, eval_type=1)

#The behavior policy is the fully random policy
randPol = np.ones(fullPol.shape)/(fullPol.shape[1])

Generate Behaviour Policy Data

In [72]:
dgen = DataGenerator()
states, actions, lengths, rewards, diab, emp_tx_totals, emp_r_totals = dgen.simulate(
    NSIMSAMPS, NSTEPS, policy=randPol, policy_idx_type='full',
    p_diabetes=PROB_DIAB, use_tqdm=False) #True, tqdm_desc='Behaviour Policy Simulation')

obs_samps = utils.format_dgen_samps(
    states, actions, rewards, diab, NSTEPS, NSIMSAMPS)

Convert data into array format

In [73]:
time = np.arange(NSTEPS)
times = np.stack(axis=0, arrays=[time]*NSIMSAMPS)
times = times[..., np.newaxis]

nf_transitions_b = np.concatenate((times, states[:, 0:NSTEPS, :], actions, rewards, states[:, 1:, :]), axis=2)
nf_transitions_b.shape

(100000, 20, 5)

Convert data into factored format

In [74]:
def factorise_dataset(data):
  #With two action spaces: (ABX, VASO) and (VENT), we can perfectly satisfy theorem 1
  f_transitions_anva_ve = np.zeros((NSIMSAMPS, NSTEPS, 2, 5))
  #We also investigate the factorisations that would not satisfy theorem 1:
  # (ABX, VENT), (VASO)
  f_transitions_anve_va = np.zeros((NSIMSAMPS, NSTEPS, 2, 5))
  # (VENT, VASO), (ABX)
  f_transitions_veva_an = np.zeros((NSIMSAMPS, NSTEPS, 2, 5))
  #With three action spaces, we can reduce the variance but may increase bias
  f_transitions_an_va_ve = np.zeros((NSIMSAMPS, NSTEPS, 3, 5))

  #Transform unfactored data sample by sample
  for step in range(NSTEPS):
    for trajectory in range(NSIMSAMPS):
      #Extract necessary features
      seg = data[trajectory, step, :]
      time = seg[0]
      state = seg[1]
      action = seg[2]
      reward = seg[3]
      next_state = seg[4]

      #Set times
      f_transitions_anva_ve[trajectory, step, :, 0] = [time]*2
      f_transitions_anve_va[trajectory, step, :, 0] = [time]*2
      f_transitions_veva_an[trajectory, step, :, 0] = [time]*2
      f_transitions_an_va_ve[trajectory, step, :, 0] = [time]*3

      #Set factored states (state abstractions)
      f_transitions_anva_ve[trajectory, step, :, 1] = [state]*2
      f_transitions_anve_va[trajectory, step, :, 1] = [state]*2
      f_transitions_veva_an[trajectory, step, :, 1] = [state]*2
      f_transitions_an_va_ve[trajectory, step, :, 1] = [state]*3

      action_anva_ve = [-1, -1]
      action_anve_va = [-1, -1]
      action_veva_an = [-1, -1]
      action_an_va_ve = [-1, -1, -1]
      #Set factored actions
      if not (action == -1):
        actionObj = Action(action_idx=action)
        action_vec = actionObj.get_action_vec()

        action_anva_ve[0] = action_vec[1,0]
        action_anva_ve[1] = 2*action_vec[0,0] + action_vec[2,0]

        action_anve_va[0] = action_vec[2,0]
        action_anve_va[1] = 2*action_vec[0,0] + action_vec[1,0]

        action_veva_an[0] = action_vec[0,0]
        action_veva_an[1] = 2*action_vec[1,0] + action_vec[2,0]

        action_an_va_ve[0] = action_vec[0,0]
        action_an_va_ve[1] = action_vec[1,0]
        action_an_va_ve[2] = action_vec[2,0]

      f_transitions_anva_ve[trajectory, step, :, 2] = action_anva_ve

      f_transitions_anve_va[trajectory, step, :, 2] = action_anve_va

      f_transitions_veva_an[trajectory, step, :, 2] = action_veva_an

      f_transitions_an_va_ve[trajectory, step, :, 2] = action_an_va_ve

      #Set factored rewards
      f_transitions_anva_ve[trajectory, step, :, 3] = [reward/2.0]*2
      f_transitions_anve_va[trajectory, step, :, 3] = [reward/2.0]*2
      f_transitions_veva_an[trajectory, step, :, 3] = [reward/2.0]*2
      f_transitions_an_va_ve[trajectory, step, :, 3] = [reward/3.0]*3

      #Set factored next states (state abstractions)
      f_transitions_anva_ve[trajectory, step, :, 4] = [next_state]*2
      f_transitions_anve_va[trajectory, step, :, 4] = [next_state]*2
      f_transitions_veva_an[trajectory, step, :, 4] = [next_state]*2
      f_transitions_an_va_ve[trajectory, step, :, 4] = [next_state]*3

  return f_transitions_anva_ve,  f_transitions_anve_va, f_transitions_veva_an, f_transitions_an_va_ve

In [75]:
tb = factorise_dataset(nf_transitions_b)

In [76]:
import numpy as np
import gc

def hashgen(arr):
  return np.sum(arr[:, 4])

def evaluate_coverage(tb, te, nf_b, nf_e, NUM_EPISODES, EPISODE_LENGTH=20):

  f_b1, f_b2, f_b3, f_b4 = tb
  f_e1, f_e2, f_e3, f_e4 = te

  #Obtain dimensions
  N, T = nf_b.shape[0], nf_b.shape[1]
  D = f_b2.shape[2]

  hash_arr_nf = np.zeros((NUM_EPISODES,2))
  hash_arr_f1 = np.zeros((NUM_EPISODES,D,2))
  hash_arr_f2 = np.zeros((NUM_EPISODES,D,2))
  hash_arr_f3 = np.zeros((NUM_EPISODES,D,2))
  hash_arr_f4 = np.zeros((NUM_EPISODES,D+1,2))

  for n in range(NUM_EPISODES):
    hash_arr_nf[n, :] = np.array([hashgen(nf_b[n, :EPISODE_LENGTH, :]), n])
    for d in range(D+1):
      if d < D:
        #----------
        hash_arr_f1[n, d, :] = np.array([hashgen(f_b1[n, :EPISODE_LENGTH, d, :]), n])
        #----------
        hash_arr_f2[n, d, :] = np.array([hashgen(f_b2[n, :EPISODE_LENGTH, d, :]), n])
        #----------
        hash_arr_f3[n, d, :] = np.array([hashgen(f_b3[n, :EPISODE_LENGTH, d, :]), n])
      #----------
      hash_arr_f4[n, d, :] = np.array([hashgen(f_b4[n, :EPISODE_LENGTH, d, :]), n])

  hash_arr_nf = hash_arr_nf[hash_arr_nf[:, 0].argsort()]
  for d in range(D+1):
    if d < D:
      hash_arr_f1[:, d, :] = hash_arr_f1[:, d, :][hash_arr_f1[:, d, 0].argsort()]
      hash_arr_f2[:, d, :] = hash_arr_f2[:, d, :][hash_arr_f2[:, d, 0].argsort()]
      hash_arr_f3[:, d, :] = hash_arr_f3[:, d, :][hash_arr_f3[:, d, 0].argsort()]
    hash_arr_f4[:, d, :] = hash_arr_f4[:, d, :][hash_arr_f4[:, d, 0].argsort()]

  #Use a variant of binary search
  def search_hash_nf(hash, arr):
    max_index = NUM_EPISODES-1
    min_index = 0
    while max_index - min_index > 1:
      index = (max_index + min_index)//2
      #Narrowing of search space
      if hash_arr_nf[index, 0] > hash:
        max_index = index
      elif hash_arr_nf[index, 0] < hash:
        min_index = index
      #If the hash value is found, iterate over all matching hashes
      else:
        #Iterate downwards
        index_copy = index
        while hash_arr_nf[index, 0] == hash and index >= min_index:
            if ( np.isclose(nf_b[int(hash_arr_nf[index, 1]), :EPISODE_LENGTH, :], arr) ).all():
              return True
            index -= 1
        #Iterate upwards
        index = index_copy + 1
        while index < NUM_EPISODES and hash_arr_nf[index, 0] == hash and index <= max_index:
            if ( np.isclose(nf_b[int(hash_arr_nf[index, 1]), :EPISODE_LENGTH, :], arr) ).all():
              return True
            index += 1
        #Nothing found
        return False
    #Finally if the min and max indices are separated by <1, test both
    if hash_arr_nf[min_index, 0] == hash:
        if ( np.isclose(nf_b[int(hash_arr_nf[min_index, 1]), :EPISODE_LENGTH, :], arr) ).all():
            return True
    if min_index != max_index and hash_arr_nf[max_index, 0] == hash:
        if ( np.isclose(nf_b[int(hash_arr_nf[max_index, 1]), :EPISODE_LENGTH, :], arr) ).all():
            return True
    #Nothing found
    return False


  def search_hash_f(hash, hash_arr_f, f_b, arr, d):
    max_index = NUM_EPISODES-1
    min_index = 0
    while max_index - min_index > 1:
      index = (max_index + min_index)//2
      #Narrowing of search space
      if hash_arr_nf[index, 0] > hash:
        max_index = index
      elif hash_arr_nf[index, 0] < hash:
        min_index = index
      #If the hash value is found, iterate over all matching hashes
      else:
        #Iterate downwards
        index_copy = index
        while hash_arr_f[index, d, 0] == hash and index >= min_index:
            test_arr = f_b[int(hash_arr_f[index, d, 1]), :EPISODE_LENGTH, d, :]
            if ( np.isclose(test_arr, arr) ).all():
              return True
            index -= 1
        #Iterate upwards
        index = index_copy + 1
        while index < NUM_EPISODES and hash_arr_f[index, d, 0] == hash and index <= max_index:
            test_arr = f_b[int(hash_arr_f[index, d, 1]), :EPISODE_LENGTH, d, :]
            if ( np.isclose(test_arr, arr) ).all():
              return True
            index += 1
        #Nothing found
        return False

    #Finally if the min and max indices are separated by <1, test both
    if hash_arr_f[min_index, d, 0] == hash:
        test_arr = f_b[int(hash_arr_f[min_index, d, 1]), :EPISODE_LENGTH, d, :]
        if ( np.isclose(test_arr, arr) ).all():
          return True
    if min_index != max_index and hash_arr_f[max_index, d, 0] == hash:
        test_arr = f_b[int(hash_arr_f[max_index, d, 1]), :EPISODE_LENGTH, d, :]
        if ( np.isclose(test_arr, arr) ).all():
          return True
    #Nothing found
    return False

  nf_covered_trs = 0
  f_covered_trs1 = np.zeros(D)
  f_covered_trs2 = np.zeros(D)
  f_covered_trs3 = np.zeros(D)
  f_covered_trs4 = np.zeros(D+1)

  for n in range(NUM_EPISODES):
      nf_tr_e = nf_e[n, :EPISODE_LENGTH, :]

      if search_hash_nf(hashgen(nf_tr_e), nf_tr_e):
        nf_covered_trs += 1

      for d in range(D+1):
        if d < D:
          f_tr_e1 = f_e1[n, :EPISODE_LENGTH, d, :]
          f_tr_e2 = f_e2[n, :EPISODE_LENGTH, d, :]
          f_tr_e3 = f_e3[n, :EPISODE_LENGTH, d, :]

          if search_hash_f(hashgen(f_tr_e1), hash_arr_f1, f_b1, f_tr_e1, d):
            f_covered_trs1[d] += 1
          if search_hash_f(hashgen(f_tr_e2), hash_arr_f2, f_b2, f_tr_e2, d):
            f_covered_trs2[d] += 1
          if search_hash_f(hashgen(f_tr_e3), hash_arr_f3, f_b3, f_tr_e3, d):
            f_covered_trs3[d] += 1

        f_tr_e4 = f_e4[n, :EPISODE_LENGTH, d, :]
        if search_hash_f(hashgen(f_tr_e4), hash_arr_f4, f_b4, f_tr_e4, d):
          f_covered_trs4[d] += 1

  print(nf_covered_trs/NUM_EPISODES)
  print(f_covered_trs1/NUM_EPISODES)
  print(f_covered_trs2/NUM_EPISODES)
  print(f_covered_trs3/NUM_EPISODES)
  print(f_covered_trs4/NUM_EPISODES)

  return nf_covered_trs/NUM_EPISODES, f_covered_trs1/NUM_EPISODES, f_covered_trs2/NUM_EPISODES, f_covered_trs3/NUM_EPISODES, f_covered_trs4/NUM_EPISODES

# Evaluation policy for policy divergence 8.0

In [77]:
EVAL_EPSILON = 0.0

evalPolSoft = np.copy(fullPol)
evalPolSoft[evalPolSoft == 1] = 1 - EVAL_EPSILON
evalPolSoft[evalPolSoft == 0] = EVAL_EPSILON / (n_actions - 1)

In [78]:
dgen = DataGenerator()
states, actions, lengths, rewards, diab, emp_tx_totals, emp_r_totals = dgen.simulate(
    NSIMSAMPS, NSTEPS, policy=evalPolSoft, policy_idx_type='full',
    p_diabetes=PROB_DIAB, use_tqdm=False) #True, tqdm_desc='Behaviour Policy Simulation')

obs_samps = utils.format_dgen_samps(
    states, actions, rewards, diab, NSTEPS, NSIMSAMPS)

In [79]:
time = np.arange(NSTEPS)
times = np.stack(axis=0, arrays=[time]*NSIMSAMPS)
times = times[..., np.newaxis]

nf_transitions_e = np.concatenate((times, states[:, 0:NSTEPS, :], actions, rewards, states[:, 1:, :]), axis=2)
nf_transitions_e.shape

(100000, 20, 5)

In [80]:
te = factorise_dataset(nf_transitions_e)

In [81]:
nfE = np.zeros(9)
f1E = np.zeros((9,2))
f2E = np.zeros((9,2))
f3E = np.zeros((9,2))
f4E = np.zeros((9,3))

EPISODES = [10, 50, 100, 500, 1000, 5000, 10000, 50000, 100000]

for (id, ep) in enumerate(EPISODES):
  nfE[id], f1E[id], f2E[id], f3E[id], f4E[id] = evaluate_coverage(tb, te, nf_transitions_b, nf_transitions_e, ep)
  gc.collect()

for arr in [nfE, f1E, f2E, f3E, f4E]:
  print(list(arr))

0.0
[0. 0.]
[0. 0.]
[0. 0.]
[0. 0. 0.]
0.06
[0.06 0.06]
[0.06 0.06]
[0.06 0.06]
[0.06 0.06 0.06]
0.04
[0.04 0.04]
[0.04 0.04]
[0.04 0.04]
[0.04 0.04 0.04]
0.026
[0.026 0.026]
[0.026 0.026]
[0.026 0.026]
[0.026 0.026 0.026]
0.022
[0.022 0.022]
[0.022 0.022]
[0.022 0.022]
[0.022 0.022 0.022]
0.0578
[0.0578 0.0578]
[0.0578 0.0578]
[0.0578 0.0578]
[0.0578 0.0578 0.0578]
0.095
[0.095 0.095]
[0.095 0.095]
[0.095 0.095]
[0.095 0.095 0.095]
0.09712
[0.09712 0.09712]
[0.09712 0.09712]
[0.09712 0.09712]
[0.09712 0.09712 0.09712]
0.13877
[0.13877 0.13877]
[0.13877 0.13877]
[0.13877 0.13877]
[0.13877 0.13877 0.13877]
[0.0, 0.06, 0.04, 0.026, 0.022, 0.0578, 0.095, 0.09712, 0.13877]
[array([0., 0.]), array([0.06, 0.06]), array([0.04, 0.04]), array([0.026, 0.026]), array([0.022, 0.022]), array([0.0578, 0.0578]), array([0.095, 0.095]), array([0.09712, 0.09712]), array([0.13877, 0.13877])]
[array([0., 0.]), array([0.06, 0.06]), array([0.04, 0.04]), array([0.026, 0.026]), array([0.022, 0.022]), array([0

In [82]:
nfS = np.zeros(11)
f1S = np.zeros((11,2))
f2S = np.zeros((11,2))
f3S = np.zeros((11,2))
f4S = np.zeros((11,3))

STEPS = [1, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20]

for (id, step) in enumerate(STEPS):
  nfS[id], f1S[id], f2S[id], f3S[id], f4S[id] = evaluate_coverage(tb, te, nf_transitions_b, nf_transitions_e, NUM_EPISODES=1000, EPISODE_LENGTH=step)
  gc.collect()

for arr in [nfS, f1S, f2S, f3S, f4S]:
  print(list(arr))

1.0
[1. 1.]
[1. 1.]
[1. 1.]
[1. 1. 1.]
0.929
[0.929 0.929]
[0.929 0.929]
[0.929 0.929]
[0.929 0.929 0.929]
0.029
[0.029 0.029]
[0.029 0.029]
[0.029 0.029]
[0.029 0.029 0.029]
0.022
[0.022 0.022]
[0.022 0.022]
[0.022 0.022]
[0.022 0.022 0.022]
0.022
[0.022 0.022]
[0.022 0.022]
[0.022 0.022]
[0.022 0.022 0.022]
0.022
[0.022 0.022]
[0.022 0.022]
[0.022 0.022]
[0.022 0.022 0.022]
0.022
[0.022 0.022]
[0.022 0.022]
[0.022 0.022]
[0.022 0.022 0.022]
0.022
[0.022 0.022]
[0.022 0.022]
[0.022 0.022]
[0.022 0.022 0.022]
0.022
[0.022 0.022]
[0.022 0.022]
[0.022 0.022]
[0.022 0.022 0.022]
0.022
[0.022 0.022]
[0.022 0.022]
[0.022 0.022]
[0.022 0.022 0.022]
0.022
[0.022 0.022]
[0.022 0.022]
[0.022 0.022]
[0.022 0.022 0.022]
[1.0, 0.929, 0.029, 0.022, 0.022, 0.022, 0.022, 0.022, 0.022, 0.022, 0.022]
[array([1., 1.]), array([0.929, 0.929]), array([0.029, 0.029]), array([0.022, 0.022]), array([0.022, 0.022]), array([0.022, 0.022]), array([0.022, 0.022]), array([0.022, 0.022]), array([0.022, 0.022]), arr

# Evaluation policy for policy divergence 6.4

In [83]:
EVAL_EPSILON = 0.2

evalPolSoft = np.copy(fullPol)
evalPolSoft[evalPolSoft == 1] = 1 - EVAL_EPSILON
evalPolSoft[evalPolSoft == 0] = EVAL_EPSILON / (n_actions - 1)

In [84]:
dgen = DataGenerator()
states, actions, lengths, rewards, diab, emp_tx_totals, emp_r_totals = dgen.simulate(
    NSIMSAMPS, NSTEPS, policy=evalPolSoft, policy_idx_type='full',
    p_diabetes=PROB_DIAB, use_tqdm=False) #True, tqdm_desc='Behaviour Policy Simulation')

obs_samps = utils.format_dgen_samps(
    states, actions, rewards, diab, NSTEPS, NSIMSAMPS)

In [85]:
time = np.arange(NSTEPS)
times = np.stack(axis=0, arrays=[time]*NSIMSAMPS)
times = times[..., np.newaxis]

nf_transitions_e = np.concatenate((times, states[:, 0:NSTEPS, :], actions, rewards, states[:, 1:, :]), axis=2)
nf_transitions_e.shape

(100000, 20, 5)

In [86]:
te = factorise_dataset(nf_transitions_e)

In [87]:
nfE = np.zeros(9)
f1E = np.zeros((9,2))
f2E = np.zeros((9,2))
f3E = np.zeros((9,2))
f4E = np.zeros((9,3))

EPISODES = [10, 50, 100, 500, 1000, 5000, 10000, 50000, 100000]

for (id, ep) in enumerate(EPISODES):
  nfE[id], f1E[id], f2E[id], f3E[id], f4E[id] = evaluate_coverage(tb, te, nf_transitions_b, nf_transitions_e, ep)
  gc.collect()

for arr in [nfE, f1E, f2E, f3E, f4E]:
  print(list(arr))

0.0
[0. 0.]
[0. 0.]
[0. 0.]
[0. 0. 0.]
0.0
[0. 0.]
[0. 0.]
[0. 0.]
[0. 0. 0.]
0.02
[0.02 0.02]
[0.02 0.02]
[0.02 0.02]
[0.02 0.02 0.02]
0.026
[0.026 0.026]
[0.026 0.026]
[0.026 0.026]
[0.026 0.026 0.026]
0.027
[0.027 0.027]
[0.027 0.027]
[0.027 0.027]
[0.027 0.027 0.027]
0.0768
[0.0768 0.0768]
[0.0768 0.0768]
[0.0768 0.0768]
[0.0768 0.0768 0.0768]
0.1036
[0.1036 0.1036]
[0.1036 0.1036]
[0.1036 0.1036]
[0.1036 0.1036 0.1036]
0.11306
[0.11306 0.11306]
[0.11306 0.11306]
[0.11306 0.11306]
[0.11306 0.11306 0.11306]
0.13812
[0.13812 0.13812]
[0.13812 0.13812]
[0.13812 0.13812]
[0.13812 0.13812 0.13812]
[0.0, 0.0, 0.02, 0.026, 0.027, 0.0768, 0.1036, 0.11306, 0.13812]
[array([0., 0.]), array([0., 0.]), array([0.02, 0.02]), array([0.026, 0.026]), array([0.027, 0.027]), array([0.0768, 0.0768]), array([0.1036, 0.1036]), array([0.11306, 0.11306]), array([0.13812, 0.13812])]
[array([0., 0.]), array([0., 0.]), array([0.02, 0.02]), array([0.026, 0.026]), array([0.027, 0.027]), array([0.0768, 0.0768])

In [88]:
nfS = np.zeros(11)
f1S = np.zeros((11,2))
f2S = np.zeros((11,2))
f3S = np.zeros((11,2))
f4S = np.zeros((11,3))

STEPS = [1, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20]

for (id, step) in enumerate(STEPS):
  nfS[id], f1S[id], f2S[id], f3S[id], f4S[id] = evaluate_coverage(tb, te, nf_transitions_b, nf_transitions_e, NUM_EPISODES=1000, EPISODE_LENGTH=step)
  gc.collect()

for arr in [nfS, f1S, f2S, f3S, f4S]:
  print(list(arr))

0.999
[0.999 0.999]
[0.999 0.999]
[0.999 0.999]
[0.999 0.999 0.999]
0.839
[0.839 0.839]
[0.839 0.839]
[0.839 0.839]
[0.839 0.839 0.839]
0.038
[0.038 0.038]
[0.038 0.038]
[0.038 0.038]
[0.038 0.038 0.038]
0.03
[0.03 0.03]
[0.03 0.03]
[0.03 0.03]
[0.03 0.03 0.03]
0.027
[0.027 0.027]
[0.027 0.027]
[0.027 0.027]
[0.027 0.027 0.027]
0.027
[0.027 0.027]
[0.027 0.027]
[0.027 0.027]
[0.027 0.027 0.027]
0.027
[0.027 0.027]
[0.027 0.027]
[0.027 0.027]
[0.027 0.027 0.027]
0.027
[0.027 0.027]
[0.027 0.027]
[0.027 0.027]
[0.027 0.027 0.027]
0.027
[0.027 0.027]
[0.027 0.027]
[0.027 0.027]
[0.027 0.027 0.027]
0.027
[0.027 0.027]
[0.027 0.027]
[0.027 0.027]
[0.027 0.027 0.027]
0.027
[0.027 0.027]
[0.027 0.027]
[0.027 0.027]
[0.027 0.027 0.027]
[0.999, 0.839, 0.038, 0.03, 0.027, 0.027, 0.027, 0.027, 0.027, 0.027, 0.027]
[array([0.999, 0.999]), array([0.839, 0.839]), array([0.038, 0.038]), array([0.03, 0.03]), array([0.027, 0.027]), array([0.027, 0.027]), array([0.027, 0.027]), array([0.027, 0.027]), ar

# Evaluation policy for policy divergence 4.8

In [89]:
EVAL_EPSILON = 0.4

evalPolSoft = np.copy(fullPol)
evalPolSoft[evalPolSoft == 1] = 1 - EVAL_EPSILON
evalPolSoft[evalPolSoft == 0] = EVAL_EPSILON / (n_actions - 1)

In [90]:
dgen = DataGenerator()
states, actions, lengths, rewards, diab, emp_tx_totals, emp_r_totals = dgen.simulate(
    NSIMSAMPS, NSTEPS, policy=evalPolSoft, policy_idx_type='full',
    p_diabetes=PROB_DIAB, use_tqdm=False) #True, tqdm_desc='Behaviour Policy Simulation')

obs_samps = utils.format_dgen_samps(
    states, actions, rewards, diab, NSTEPS, NSIMSAMPS)

In [91]:
time = np.arange(NSTEPS)
times = np.stack(axis=0, arrays=[time]*NSIMSAMPS)
times = times[..., np.newaxis]

nf_transitions_e = np.concatenate((times, states[:, 0:NSTEPS, :], actions, rewards, states[:, 1:, :]), axis=2)
nf_transitions_e.shape

(100000, 20, 5)

In [92]:
te = factorise_dataset(nf_transitions_e)

In [93]:
nfE = np.zeros(9)
f1E = np.zeros((9,2))
f2E = np.zeros((9,2))
f3E = np.zeros((9,2))
f4E = np.zeros((9,3))

EPISODES = [10, 50, 100, 500, 1000, 5000, 10000, 50000, 100000]

for (id, ep) in enumerate(EPISODES):
  nfE[id], f1E[id], f2E[id], f3E[id], f4E[id] = evaluate_coverage(tb, te, nf_transitions_b, nf_transitions_e, ep)
  gc.collect()

for arr in [nfE, f1E, f2E, f3E, f4E]:
  print(list(arr))

0.0
[0. 0.]
[0. 0.]
[0. 0.]
[0. 0. 0.]
0.04
[0.04 0.04]
[0.04 0.04]
[0.04 0.04]
[0.04 0.04 0.04]
0.06
[0.06 0.06]
[0.06 0.06]
[0.06 0.06]
[0.06 0.06 0.06]
0.034
[0.034 0.034]
[0.034 0.034]
[0.034 0.034]
[0.034 0.034 0.034]
0.059
[0.059 0.059]
[0.059 0.059]
[0.059 0.059]
[0.059 0.059 0.059]
0.0922
[0.0922 0.0922]
[0.0922 0.0922]
[0.0922 0.0922]
[0.0922 0.0922 0.0922]
0.1118
[0.1118 0.1118]
[0.1118 0.1118]
[0.1118 0.1118]
[0.1118 0.1118 0.1118]
0.12958
[0.12958 0.12958]
[0.12958 0.12958]
[0.12958 0.12958]
[0.12958 0.12958 0.12958]
0.14748
[0.14748 0.14748]
[0.14748 0.14748]
[0.14748 0.14748]
[0.14748 0.14748 0.14748]
[0.0, 0.04, 0.06, 0.034, 0.059, 0.0922, 0.1118, 0.12958, 0.14748]
[array([0., 0.]), array([0.04, 0.04]), array([0.06, 0.06]), array([0.034, 0.034]), array([0.059, 0.059]), array([0.0922, 0.0922]), array([0.1118, 0.1118]), array([0.12958, 0.12958]), array([0.14748, 0.14748])]
[array([0., 0.]), array([0.04, 0.04]), array([0.06, 0.06]), array([0.034, 0.034]), array([0.059, 0.05

In [94]:
nfS = np.zeros(11)
f1S = np.zeros((11,2))
f2S = np.zeros((11,2))
f3S = np.zeros((11,2))
f4S = np.zeros((11,3))

STEPS = [1, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20]

for (id, step) in enumerate(STEPS):
  nfS[id], f1S[id], f2S[id], f3S[id], f4S[id] = evaluate_coverage(tb, te, nf_transitions_b, nf_transitions_e, NUM_EPISODES=1000, EPISODE_LENGTH=step)
  gc.collect()

for arr in [nfS, f1S, f2S, f3S, f4S]:
  print(list(arr))

0.997
[0.997 0.997]
[0.997 0.997]
[0.997 0.997]
[0.997 0.997 0.997]
0.827
[0.827 0.827]
[0.827 0.827]
[0.827 0.827]
[0.827 0.827 0.827]
0.079
[0.079 0.079]
[0.079 0.079]
[0.079 0.079]
[0.079 0.079 0.079]
0.061
[0.061 0.061]
[0.061 0.061]
[0.061 0.061]
[0.061 0.061 0.061]
0.059
[0.059 0.059]
[0.059 0.059]
[0.059 0.059]
[0.059 0.059 0.059]
0.059
[0.059 0.059]
[0.059 0.059]
[0.059 0.059]
[0.059 0.059 0.059]
0.059
[0.059 0.059]
[0.059 0.059]
[0.059 0.059]
[0.059 0.059 0.059]
0.059
[0.059 0.059]
[0.059 0.059]
[0.059 0.059]
[0.059 0.059 0.059]
0.059
[0.059 0.059]
[0.059 0.059]
[0.059 0.059]
[0.059 0.059 0.059]
0.059
[0.059 0.059]
[0.059 0.059]
[0.059 0.059]
[0.059 0.059 0.059]
0.059
[0.059 0.059]
[0.059 0.059]
[0.059 0.059]
[0.059 0.059 0.059]
[0.997, 0.827, 0.079, 0.061, 0.059, 0.059, 0.059, 0.059, 0.059, 0.059, 0.059]
[array([0.997, 0.997]), array([0.827, 0.827]), array([0.079, 0.079]), array([0.061, 0.061]), array([0.059, 0.059]), array([0.059, 0.059]), array([0.059, 0.059]), array([0.059

# Evaluation policy for policy divergence 3.2

In [95]:
gc.collect()
EVAL_EPSILON = 0.6

evalPolSoft = np.copy(fullPol)
evalPolSoft[evalPolSoft == 1] = 1 - EVAL_EPSILON
evalPolSoft[evalPolSoft == 0] = EVAL_EPSILON / (n_actions - 1)

In [96]:
dgen = DataGenerator()
states, actions, lengths, rewards, diab, emp_tx_totals, emp_r_totals = dgen.simulate(
    NSIMSAMPS, NSTEPS, policy=evalPolSoft, policy_idx_type='full',
    p_diabetes=PROB_DIAB, use_tqdm=False) #True, tqdm_desc='Behaviour Policy Simulation')

obs_samps = utils.format_dgen_samps(
    states, actions, rewards, diab, NSTEPS, NSIMSAMPS)

In [97]:
time = np.arange(NSTEPS)
times = np.stack(axis=0, arrays=[time]*NSIMSAMPS)
times = times[..., np.newaxis]

nf_transitions_e = np.concatenate((times, states[:, 0:NSTEPS, :], actions, rewards, states[:, 1:, :]), axis=2)
nf_transitions_e.shape

(100000, 20, 5)

In [98]:
te = factorise_dataset(nf_transitions_e)

In [99]:
nfE = np.zeros(9)
f1E = np.zeros((9,2))
f2E = np.zeros((9,2))
f3E = np.zeros((9,2))
f4E = np.zeros((9,3))

EPISODES = [10, 50, 100, 500, 1000, 5000, 10000, 50000, 100000]

for (id, ep) in enumerate(EPISODES):
  nfE[id], f1E[id], f2E[id], f3E[id], f4E[id] = evaluate_coverage(tb, te, nf_transitions_b, nf_transitions_e, ep)
  gc.collect()

for arr in [nfE, f1E, f2E, f3E, f4E]:
  print(list(arr))

0.0
[0. 0.]
[0. 0.]
[0. 0.]
[0. 0. 0.]
0.02
[0.02 0.02]
[0.02 0.02]
[0.02 0.02]
[0.02 0.02 0.02]
0.04
[0.04 0.04]
[0.04 0.04]
[0.04 0.04]
[0.04 0.04 0.04]
0.062
[0.062 0.062]
[0.062 0.062]
[0.062 0.062]
[0.062 0.062 0.062]
0.077
[0.077 0.077]
[0.077 0.077]
[0.077 0.077]
[0.077 0.077 0.077]
0.1108
[0.1108 0.1108]
[0.1108 0.1108]
[0.1108 0.1108]
[0.1108 0.1108 0.1108]
0.1306
[0.1306 0.1306]
[0.1306 0.1306]
[0.1306 0.1306]
[0.1306 0.1306 0.1306]
0.15344
[0.15344 0.15344]
[0.15344 0.15344]
[0.15344 0.15344]
[0.15344 0.15344 0.15344]
0.16511
[0.16511 0.16511]
[0.16511 0.16511]
[0.16511 0.16511]
[0.16511 0.16511 0.16511]
[0.0, 0.02, 0.04, 0.062, 0.077, 0.1108, 0.1306, 0.15344, 0.16511]
[array([0., 0.]), array([0.02, 0.02]), array([0.04, 0.04]), array([0.062, 0.062]), array([0.077, 0.077]), array([0.1108, 0.1108]), array([0.1306, 0.1306]), array([0.15344, 0.15344]), array([0.16511, 0.16511])]
[array([0., 0.]), array([0.02, 0.02]), array([0.04, 0.04]), array([0.062, 0.062]), array([0.077, 0.07

In [100]:
nfS = np.zeros(11)
f1S = np.zeros((11,2))
f2S = np.zeros((11,2))
f3S = np.zeros((11,2))
f4S = np.zeros((11,3))

STEPS = [1, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20]

for (id, step) in enumerate(STEPS):
  nfS[id], f1S[id], f2S[id], f3S[id], f4S[id] = evaluate_coverage(tb, te, nf_transitions_b, nf_transitions_e, NUM_EPISODES=1000, EPISODE_LENGTH=step)
  gc.collect()

for arr in [nfS, f1S, f2S, f3S, f4S]:
  print(list(arr))

0.995
[0.995 0.995]
[0.995 0.995]
[0.995 0.995]
[0.995 0.995 0.995]
0.792
[0.792 0.792]
[0.792 0.792]
[0.792 0.792]
[0.792 0.792 0.792]
0.085
[0.085 0.085]
[0.085 0.085]
[0.085 0.085]
[0.085 0.085 0.085]
0.077
[0.077 0.077]
[0.077 0.077]
[0.077 0.077]
[0.077 0.077 0.077]
0.077
[0.077 0.077]
[0.077 0.077]
[0.077 0.077]
[0.077 0.077 0.077]
0.077
[0.077 0.077]
[0.077 0.077]
[0.077 0.077]
[0.077 0.077 0.077]
0.077
[0.077 0.077]
[0.077 0.077]
[0.077 0.077]
[0.077 0.077 0.077]
0.077
[0.077 0.077]
[0.077 0.077]
[0.077 0.077]
[0.077 0.077 0.077]
0.077
[0.077 0.077]
[0.077 0.077]
[0.077 0.077]
[0.077 0.077 0.077]
0.077
[0.077 0.077]
[0.077 0.077]
[0.077 0.077]
[0.077 0.077 0.077]
0.077
[0.077 0.077]
[0.077 0.077]
[0.077 0.077]
[0.077 0.077 0.077]
[0.995, 0.792, 0.085, 0.077, 0.077, 0.077, 0.077, 0.077, 0.077, 0.077, 0.077]
[array([0.995, 0.995]), array([0.792, 0.792]), array([0.085, 0.085]), array([0.077, 0.077]), array([0.077, 0.077]), array([0.077, 0.077]), array([0.077, 0.077]), array([0.077

# Evaluation policy for policy divergence 1.6

In [101]:
gc.collect()
EVAL_EPSILON = 0.8

evalPolSoft = np.copy(fullPol)
evalPolSoft[evalPolSoft == 1] = 1 - EVAL_EPSILON
evalPolSoft[evalPolSoft == 0] = EVAL_EPSILON / (n_actions - 1)

In [102]:
dgen = DataGenerator()
states, actions, lengths, rewards, diab, emp_tx_totals, emp_r_totals = dgen.simulate(
    NSIMSAMPS, NSTEPS, policy=evalPolSoft, policy_idx_type='full',
    p_diabetes=PROB_DIAB, use_tqdm=False) #True, tqdm_desc='Behaviour Policy Simulation')

obs_samps = utils.format_dgen_samps(
    states, actions, rewards, diab, NSTEPS, NSIMSAMPS)

In [103]:
time = np.arange(NSTEPS)
times = np.stack(axis=0, arrays=[time]*NSIMSAMPS)
times = times[..., np.newaxis]

nf_transitions_e = np.concatenate((times, states[:, 0:NSTEPS, :], actions, rewards, states[:, 1:, :]), axis=2)
nf_transitions_e.shape

(100000, 20, 5)

In [104]:
te = factorise_dataset(nf_transitions_e)

In [105]:
nfE = np.zeros(9)
f1E = np.zeros((9,2))
f2E = np.zeros((9,2))
f3E = np.zeros((9,2))
f4E = np.zeros((9,3))

EPISODES = [10, 50, 100, 500, 1000, 5000, 10000, 50000, 100000]

for (id, ep) in enumerate(EPISODES):
  nfE[id], f1E[id], f2E[id], f3E[id], f4E[id] = evaluate_coverage(tb, te, nf_transitions_b, nf_transitions_e, ep)
  gc.collect()

for arr in [nfE, f1E, f2E, f3E, f4E]:
  print(list(arr))

0.0
[0. 0.]
[0. 0.]
[0. 0.]
[0. 0. 0.]
0.0
[0. 0.]
[0. 0.]
[0. 0.]
[0. 0. 0.]
0.05
[0.05 0.05]
[0.05 0.05]
[0.05 0.05]
[0.05 0.05 0.05]
0.092
[0.092 0.092]
[0.092 0.092]
[0.092 0.092]
[0.092 0.092 0.092]
0.121
[0.121 0.121]
[0.121 0.121]
[0.121 0.121]
[0.121 0.121 0.121]
0.1384
[0.1384 0.1384]
[0.1384 0.1384]
[0.1384 0.1384]
[0.1384 0.1384 0.1384]
0.1498
[0.1498 0.1498]
[0.1498 0.1498]
[0.1498 0.1498]
[0.1498 0.1498 0.1498]
0.17454
[0.17454 0.17454]
[0.17454 0.17454]
[0.17454 0.17454]
[0.17454 0.17454 0.17454]
0.18554
[0.18554 0.18554]
[0.18554 0.18554]
[0.18554 0.18554]
[0.18554 0.18554 0.18554]
[0.0, 0.0, 0.05, 0.092, 0.121, 0.1384, 0.1498, 0.17454, 0.18554]
[array([0., 0.]), array([0., 0.]), array([0.05, 0.05]), array([0.092, 0.092]), array([0.121, 0.121]), array([0.1384, 0.1384]), array([0.1498, 0.1498]), array([0.17454, 0.17454]), array([0.18554, 0.18554])]
[array([0., 0.]), array([0., 0.]), array([0.05, 0.05]), array([0.092, 0.092]), array([0.121, 0.121]), array([0.1384, 0.1384])

In [106]:
nfS = np.zeros(11)
f1S = np.zeros((11,2))
f2S = np.zeros((11,2))
f3S = np.zeros((11,2))
f4S = np.zeros((11,3))

STEPS = [1, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20]

for (id, step) in enumerate(STEPS):
  nfS[id], f1S[id], f2S[id], f3S[id], f4S[id] = evaluate_coverage(tb, te, nf_transitions_b, nf_transitions_e, NUM_EPISODES=1000, EPISODE_LENGTH=step)
  gc.collect()

for arr in [nfS, f1S, f2S, f3S, f4S]:
  print(list(arr))

0.996
[0.996 0.996]
[0.996 0.996]
[0.996 0.996]
[0.996 0.996 0.996]
0.736
[0.736 0.736]
[0.736 0.736]
[0.736 0.736]
[0.736 0.736 0.736]
0.129
[0.129 0.129]
[0.129 0.129]
[0.129 0.129]
[0.129 0.129 0.129]
0.121
[0.121 0.121]
[0.121 0.121]
[0.121 0.121]
[0.121 0.121 0.121]
0.121
[0.121 0.121]
[0.121 0.121]
[0.121 0.121]
[0.121 0.121 0.121]
0.121
[0.121 0.121]
[0.121 0.121]
[0.121 0.121]
[0.121 0.121 0.121]
0.121
[0.121 0.121]
[0.121 0.121]
[0.121 0.121]
[0.121 0.121 0.121]
0.121
[0.121 0.121]
[0.121 0.121]
[0.121 0.121]
[0.121 0.121 0.121]
0.121
[0.121 0.121]
[0.121 0.121]
[0.121 0.121]
[0.121 0.121 0.121]
0.121
[0.121 0.121]
[0.121 0.121]
[0.121 0.121]
[0.121 0.121 0.121]
0.121
[0.121 0.121]
[0.121 0.121]
[0.121 0.121]
[0.121 0.121 0.121]
[0.996, 0.736, 0.129, 0.121, 0.121, 0.121, 0.121, 0.121, 0.121, 0.121, 0.121]
[array([0.996, 0.996]), array([0.736, 0.736]), array([0.129, 0.129]), array([0.121, 0.121]), array([0.121, 0.121]), array([0.121, 0.121]), array([0.121, 0.121]), array([0.121

# Evaluation policy for policy divergence 1.2

In [107]:
gc.collect()
EVAL_EPSILON = 0.85

evalPolSoft = np.copy(fullPol)
evalPolSoft[evalPolSoft == 1] = 1 - EVAL_EPSILON
evalPolSoft[evalPolSoft == 0] = EVAL_EPSILON / (n_actions - 1)

In [108]:
dgen = DataGenerator()
states, actions, lengths, rewards, diab, emp_tx_totals, emp_r_totals = dgen.simulate(
    NSIMSAMPS, NSTEPS, policy=evalPolSoft, policy_idx_type='full',
    p_diabetes=PROB_DIAB, use_tqdm=False) #True, tqdm_desc='Behaviour Policy Simulation')

obs_samps = utils.format_dgen_samps(
    states, actions, rewards, diab, NSTEPS, NSIMSAMPS)

In [109]:
time = np.arange(NSTEPS)
times = np.stack(axis=0, arrays=[time]*NSIMSAMPS)
times = times[..., np.newaxis]

nf_transitions_e = np.concatenate((times, states[:, 0:NSTEPS, :], actions, rewards, states[:, 1:, :]), axis=2)
nf_transitions_e.shape

(100000, 20, 5)

In [110]:
te = factorise_dataset(nf_transitions_e)

In [111]:
nfE = np.zeros(9)
f1E = np.zeros((9,2))
f2E = np.zeros((9,2))
f3E = np.zeros((9,2))
f4E = np.zeros((9,3))

EPISODES = [10, 50, 100, 500, 1000, 5000, 10000, 50000, 100000]

for (id, ep) in enumerate(EPISODES):
  nfE[id], f1E[id], f2E[id], f3E[id], f4E[id] = evaluate_coverage(tb, te, nf_transitions_b, nf_transitions_e, ep)
  gc.collect()

for arr in [nfE, f1E, f2E, f3E, f4E]:
  print(list(arr))

0.0
[0. 0.]
[0. 0.]
[0. 0.]
[0. 0. 0.]
0.04
[0.04 0.04]
[0.04 0.04]
[0.04 0.04]
[0.04 0.04 0.04]
0.09
[0.09 0.09]
[0.09 0.09]
[0.09 0.09]
[0.09 0.09 0.09]
0.088
[0.088 0.088]
[0.088 0.088]
[0.088 0.088]
[0.088 0.088 0.088]
0.106
[0.106 0.106]
[0.106 0.106]
[0.106 0.106]
[0.106 0.106 0.106]
0.1376
[0.1376 0.1376]
[0.1376 0.1376]
[0.1376 0.1376]
[0.1376 0.1376 0.1376]
0.1517
[0.1517 0.1517]
[0.1517 0.1517]
[0.1517 0.1517]
[0.1517 0.1517 0.1517]
0.1788
[0.1788 0.1788]
[0.1788 0.1788]
[0.1788 0.1788]
[0.1788 0.1788 0.1788]
0.18993
[0.18993 0.18993]
[0.18993 0.18993]
[0.18993 0.18993]
[0.18993 0.18993 0.18993]
[0.0, 0.04, 0.09, 0.088, 0.106, 0.1376, 0.1517, 0.1788, 0.18993]
[array([0., 0.]), array([0.04, 0.04]), array([0.09, 0.09]), array([0.088, 0.088]), array([0.106, 0.106]), array([0.1376, 0.1376]), array([0.1517, 0.1517]), array([0.1788, 0.1788]), array([0.18993, 0.18993])]
[array([0., 0.]), array([0.04, 0.04]), array([0.09, 0.09]), array([0.088, 0.088]), array([0.106, 0.106]), array([0

In [112]:
nfS = np.zeros(11)
f1S = np.zeros((11,2))
f2S = np.zeros((11,2))
f3S = np.zeros((11,2))
f4S = np.zeros((11,3))

STEPS = [1, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20]

for (id, step) in enumerate(STEPS):
  nfS[id], f1S[id], f2S[id], f3S[id], f4S[id] = evaluate_coverage(tb, te, nf_transitions_b, nf_transitions_e, NUM_EPISODES=1000, EPISODE_LENGTH=step)
  gc.collect()

for arr in [nfS, f1S, f2S, f3S, f4S]:
  print(list(arr))

0.997
[0.997 0.997]
[0.997 0.997]
[0.997 0.997]
[0.997 0.997 0.997]
0.759
[0.759 0.759]
[0.759 0.759]
[0.759 0.759]
[0.759 0.759 0.759]
0.112
[0.112 0.112]
[0.112 0.112]
[0.112 0.112]
[0.112 0.112 0.112]
0.106
[0.106 0.106]
[0.106 0.106]
[0.106 0.106]
[0.106 0.106 0.106]
0.106
[0.106 0.106]
[0.106 0.106]
[0.106 0.106]
[0.106 0.106 0.106]
0.106
[0.106 0.106]
[0.106 0.106]
[0.106 0.106]
[0.106 0.106 0.106]
0.106
[0.106 0.106]
[0.106 0.106]
[0.106 0.106]
[0.106 0.106 0.106]
0.106
[0.106 0.106]
[0.106 0.106]
[0.106 0.106]
[0.106 0.106 0.106]
0.106
[0.106 0.106]
[0.106 0.106]
[0.106 0.106]
[0.106 0.106 0.106]
0.106
[0.106 0.106]
[0.106 0.106]
[0.106 0.106]
[0.106 0.106 0.106]
0.106
[0.106 0.106]
[0.106 0.106]
[0.106 0.106]
[0.106 0.106 0.106]
[0.997, 0.759, 0.112, 0.106, 0.106, 0.106, 0.106, 0.106, 0.106, 0.106, 0.106]
[array([0.997, 0.997]), array([0.759, 0.759]), array([0.112, 0.112]), array([0.106, 0.106]), array([0.106, 0.106]), array([0.106, 0.106]), array([0.106, 0.106]), array([0.106

# Evaluation policy for policy divergence 1.04

In [113]:
gc.collect()
EVAL_EPSILON = 0.87

evalPolSoft = np.copy(fullPol)
evalPolSoft[evalPolSoft == 1] = 1 - EVAL_EPSILON
evalPolSoft[evalPolSoft == 0] = EVAL_EPSILON / (n_actions - 1)

In [114]:
dgen = DataGenerator()
states, actions, lengths, rewards, diab, emp_tx_totals, emp_r_totals = dgen.simulate(
    NSIMSAMPS, NSTEPS, policy=evalPolSoft, policy_idx_type='full',
    p_diabetes=PROB_DIAB, use_tqdm=False) #True, tqdm_desc='Behaviour Policy Simulation')

obs_samps = utils.format_dgen_samps(
    states, actions, rewards, diab, NSTEPS, NSIMSAMPS)

In [115]:
time = np.arange(NSTEPS)
times = np.stack(axis=0, arrays=[time]*NSIMSAMPS)
times = times[..., np.newaxis]

nf_transitions_e = np.concatenate((times, states[:, 0:NSTEPS, :], actions, rewards, states[:, 1:, :]), axis=2)
nf_transitions_e.shape

(100000, 20, 5)

In [116]:
te = factorise_dataset(nf_transitions_e)

In [117]:
nfE = np.zeros(9)
f1E = np.zeros((9,2))
f2E = np.zeros((9,2))
f3E = np.zeros((9,2))
f4E = np.zeros((9,3))

EPISODES = [10, 50, 100, 500, 1000, 5000, 10000, 50000, 100000]

for (id, ep) in enumerate(EPISODES):
  nfE[id], f1E[id], f2E[id], f3E[id], f4E[id] = evaluate_coverage(tb, te, nf_transitions_b, nf_transitions_e, ep)
  gc.collect()

for arr in [nfE, f1E, f2E, f3E, f4E]:
  print(list(arr))

0.0
[0. 0.]
[0. 0.]
[0. 0.]
[0. 0. 0.]
0.0
[0. 0.]
[0. 0.]
[0. 0.]
[0. 0. 0.]
0.05
[0.05 0.05]
[0.05 0.05]
[0.05 0.05]
[0.05 0.05 0.05]
0.086
[0.086 0.086]
[0.086 0.086]
[0.086 0.086]
[0.086 0.086 0.086]
0.116
[0.116 0.116]
[0.116 0.116]
[0.116 0.116]
[0.116 0.116 0.116]
0.135
[0.135 0.135]
[0.135 0.135]
[0.135 0.135]
[0.135 0.135 0.135]
0.1588
[0.1588 0.1588]
[0.1588 0.1588]
[0.1588 0.1588]
[0.1588 0.1588 0.1588]
0.1842
[0.1842 0.1842]
[0.1842 0.1842]
[0.1842 0.1842]
[0.1842 0.1842 0.1842]
0.19347
[0.19347 0.19347]
[0.19347 0.19347]
[0.19347 0.19347]
[0.19347 0.19347 0.19347]
[0.0, 0.0, 0.05, 0.086, 0.116, 0.135, 0.1588, 0.1842, 0.19347]
[array([0., 0.]), array([0., 0.]), array([0.05, 0.05]), array([0.086, 0.086]), array([0.116, 0.116]), array([0.135, 0.135]), array([0.1588, 0.1588]), array([0.1842, 0.1842]), array([0.19347, 0.19347])]
[array([0., 0.]), array([0., 0.]), array([0.05, 0.05]), array([0.086, 0.086]), array([0.116, 0.116]), array([0.135, 0.135]), array([0.1588, 0.1588]), a

In [118]:
nfS = np.zeros(11)
f1S = np.zeros((11,2))
f2S = np.zeros((11,2))
f3S = np.zeros((11,2))
f4S = np.zeros((11,3))

STEPS = [1, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20]

for (id, step) in enumerate(STEPS):
  nfS[id], f1S[id], f2S[id], f3S[id], f4S[id] = evaluate_coverage(tb, te, nf_transitions_b, nf_transitions_e, NUM_EPISODES=1000, EPISODE_LENGTH=step)
  gc.collect()

for arr in [nfS, f1S, f2S, f3S, f4S]:
  print(list(arr))

0.995
[0.995 0.995]
[0.995 0.995]
[0.995 0.995]
[0.995 0.995 0.995]
0.742
[0.742 0.742]
[0.742 0.742]
[0.742 0.742]
[0.742 0.742 0.742]
0.128
[0.128 0.128]
[0.128 0.128]
[0.128 0.128]
[0.128 0.128 0.128]
0.116
[0.116 0.116]
[0.116 0.116]
[0.116 0.116]
[0.116 0.116 0.116]
0.116
[0.116 0.116]
[0.116 0.116]
[0.116 0.116]
[0.116 0.116 0.116]
0.116
[0.116 0.116]
[0.116 0.116]
[0.116 0.116]
[0.116 0.116 0.116]
0.116
[0.116 0.116]
[0.116 0.116]
[0.116 0.116]
[0.116 0.116 0.116]
0.116
[0.116 0.116]
[0.116 0.116]
[0.116 0.116]
[0.116 0.116 0.116]
0.116
[0.116 0.116]
[0.116 0.116]
[0.116 0.116]
[0.116 0.116 0.116]
0.116
[0.116 0.116]
[0.116 0.116]
[0.116 0.116]
[0.116 0.116 0.116]
0.116
[0.116 0.116]
[0.116 0.116]
[0.116 0.116]
[0.116 0.116 0.116]
[0.995, 0.742, 0.128, 0.116, 0.116, 0.116, 0.116, 0.116, 0.116, 0.116, 0.116]
[array([0.995, 0.995]), array([0.742, 0.742]), array([0.128, 0.128]), array([0.116, 0.116]), array([0.116, 0.116]), array([0.116, 0.116]), array([0.116, 0.116]), array([0.116

# Evaluation policy for policy divergence 1.0

In [119]:
gc.collect()
EVAL_EPSILON = 0.875

evalPolSoft = np.copy(fullPol)
evalPolSoft[evalPolSoft == 1] = 1 - EVAL_EPSILON
evalPolSoft[evalPolSoft == 0] = EVAL_EPSILON / (n_actions - 1)

In [120]:
dgen = DataGenerator()
states, actions, lengths, rewards, diab, emp_tx_totals, emp_r_totals = dgen.simulate(
    NSIMSAMPS, NSTEPS, policy=evalPolSoft, policy_idx_type='full',
    p_diabetes=PROB_DIAB, use_tqdm=False) #True, tqdm_desc='Behaviour Policy Simulation')

obs_samps = utils.format_dgen_samps(
    states, actions, rewards, diab, NSTEPS, NSIMSAMPS)

In [121]:
time = np.arange(NSTEPS)
times = np.stack(axis=0, arrays=[time]*NSIMSAMPS)
times = times[..., np.newaxis]

nf_transitions_e = np.concatenate((times, states[:, 0:NSTEPS, :], actions, rewards, states[:, 1:, :]), axis=2)
nf_transitions_e.shape

(100000, 20, 5)

In [122]:
te = factorise_dataset(nf_transitions_e)

In [123]:
nfE = np.zeros(9)
f1E = np.zeros((9,2))
f2E = np.zeros((9,2))
f3E = np.zeros((9,2))
f4E = np.zeros((9,3))

EPISODES = [10, 50, 100, 500, 1000, 5000, 10000, 50000, 100000]

for (id, ep) in enumerate(EPISODES):
  nfE[id], f1E[id], f2E[id], f3E[id], f4E[id] = evaluate_coverage(tb, te, nf_transitions_b, nf_transitions_e, ep)
  gc.collect()

for arr in [nfE, f1E, f2E, f3E, f4E]:
  print(list(arr))

0.0
[0. 0.]
[0. 0.]
[0. 0.]
[0. 0. 0.]
0.06
[0.06 0.06]
[0.06 0.06]
[0.06 0.06]
[0.06 0.06 0.06]
0.09
[0.09 0.09]
[0.09 0.09]
[0.09 0.09]
[0.09 0.09 0.09]
0.106
[0.106 0.106]
[0.106 0.106]
[0.106 0.106]
[0.106 0.106 0.106]
0.117
[0.117 0.117]
[0.117 0.117]
[0.117 0.117]
[0.117 0.117 0.117]
0.1426
[0.1426 0.1426]
[0.1426 0.1426]
[0.1426 0.1426]
[0.1426 0.1426 0.1426]
0.1593
[0.1593 0.1593]
[0.1593 0.1593]
[0.1593 0.1593]
[0.1593 0.1593 0.1593]
0.18384
[0.18384 0.18384]
[0.18384 0.18384]
[0.18384 0.18384]
[0.18384 0.18384 0.18384]
0.1959
[0.1959 0.1959]
[0.1959 0.1959]
[0.1959 0.1959]
[0.1959 0.1959 0.1959]
[0.0, 0.06, 0.09, 0.106, 0.117, 0.1426, 0.1593, 0.18384, 0.1959]
[array([0., 0.]), array([0.06, 0.06]), array([0.09, 0.09]), array([0.106, 0.106]), array([0.117, 0.117]), array([0.1426, 0.1426]), array([0.1593, 0.1593]), array([0.18384, 0.18384]), array([0.1959, 0.1959])]
[array([0., 0.]), array([0.06, 0.06]), array([0.09, 0.09]), array([0.106, 0.106]), array([0.117, 0.117]), array([0

In [124]:
nfS = np.zeros(11)
f1S = np.zeros((11,2))
f2S = np.zeros((11,2))
f3S = np.zeros((11,2))
f4S = np.zeros((11,3))

STEPS = [1, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20]

for (id, step) in enumerate(STEPS):
  nfS[id], f1S[id], f2S[id], f3S[id], f4S[id] = evaluate_coverage(tb, te, nf_transitions_b, nf_transitions_e, NUM_EPISODES=1000, EPISODE_LENGTH=step)
  gc.collect()

for arr in [nfS, f1S, f2S, f3S, f4S]:
  print(list(arr))

0.994
[0.994 0.994]
[0.994 0.994]
[0.994 0.994]
[0.994 0.994 0.994]
0.726
[0.726 0.726]
[0.726 0.726]
[0.726 0.726]
[0.726 0.726 0.726]
0.127
[0.127 0.127]
[0.127 0.127]
[0.127 0.127]
[0.127 0.127 0.127]
0.117
[0.117 0.117]
[0.117 0.117]
[0.117 0.117]
[0.117 0.117 0.117]
0.117
[0.117 0.117]
[0.117 0.117]
[0.117 0.117]
[0.117 0.117 0.117]
0.117
[0.117 0.117]
[0.117 0.117]
[0.117 0.117]
[0.117 0.117 0.117]
0.117
[0.117 0.117]
[0.117 0.117]
[0.117 0.117]
[0.117 0.117 0.117]
0.117
[0.117 0.117]
[0.117 0.117]
[0.117 0.117]
[0.117 0.117 0.117]
0.117
[0.117 0.117]
[0.117 0.117]
[0.117 0.117]
[0.117 0.117 0.117]
0.117
[0.117 0.117]
[0.117 0.117]
[0.117 0.117]
[0.117 0.117 0.117]
0.117
[0.117 0.117]
[0.117 0.117]
[0.117 0.117]
[0.117 0.117 0.117]
[0.994, 0.726, 0.127, 0.117, 0.117, 0.117, 0.117, 0.117, 0.117, 0.117, 0.117]
[array([0.994, 0.994]), array([0.726, 0.726]), array([0.127, 0.127]), array([0.117, 0.117]), array([0.117, 0.117]), array([0.117, 0.117]), array([0.117, 0.117]), array([0.117

# Collect Data and Plot Graphs

In [None]:
EPISODES = [10, 50, 100, 500, 1000, 5000, 10000, 50000, 100000]
STEPS = [1, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20]
POLICY_DIVERGENCES = [8.0, 6.4, 4.8, 3.2, 1.6, 1.2, 1.04, 1.0]

## Patient State 136

In [None]:
vs_ep_136_d = [[0.0, 0.0, 0.01, 0.1, 0.102, 0.1452, 0.1777, 0.22804, 0.24358],
             [0.0, 0.06, 0.05, 0.15, 0.179, 0.236, 0.2745, 0.33936, 0.35979],
             [0.0, 0.1, 0.12, 0.186, 0.188, 0.2348, 0.2672, 0.3343, 0.35765],
             [0.0, 0.18, 0.27, 0.354, 0.371, 0.4218, 0.4626, 0.5458, 0.57164],
             [0.0, 0.24, 0.26, 0.362, 0.432, 0.5158, 0.5471, 0.63368, 0.65994],
             [0.0, 0.26, 0.27, 0.368, 0.429, 0.5374, 0.575, 0.65666, 0.68332],
             [0.0, 0.18, 0.27, 0.354, 0.371, 0.4218, 0.4626, 0.5458, 0.57164],
             [0.1, 0.3, 0.31, 0.422, 0.459, 0.5412, 0.5977, 0.6693, 0.69411]]

vs_step_136_d = [[0.977, 0.475, 0.102, 0.102, 0.102, 0.102, 0.102, 0.102, 0.102, 0.102, 0.102],
                [0.977, 0.524, 0.179, 0.179, 0.179, 0.179, 0.179, 0.179, 0.179, 0.179, 0.179],
                [0.99, 0.547, 0.188, 0.188, 0.188, 0.188, 0.188, 0.188, 0.188, 0.188, 0.188],
                [0.985, 0.653, 0.371, 0.371, 0.371, 0.371, 0.371, 0.371, 0.371, 0.371, 0.371],
                [0.989, 0.709, 0.432, 0.432, 0.432, 0.432, 0.432, 0.432, 0.432, 0.432, 0.432],
                [0.99, 0.712, 0.429, 0.429, 0.429, 0.429, 0.429, 0.429, 0.429, 0.429, 0.429],
                [0.985, 0.653, 0.371, 0.371, 0.371, 0.371, 0.371, 0.371, 0.371, 0.371, 0.371],
                [0.988, 0.717, 0.459, 0.459, 0.459, 0.459, 0.459, 0.459, 0.459, 0.459, 0.459]]

In [None]:
vs_ep_136_nd = [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0161, 0.0163, 0.0218],
                [0.0, 0.0, 0.01, 0.012, 0.017, 0.0346, 0.0529, 0.06504, 0.07183],
                [0.0, 0.0, 0.01, 0.04, 0.051, 0.0644, 0.0854, 0.1135, 0.12806],
                [0.0, 0.0, 0.01, 0.058, 0.066, 0.1072, 0.1256, 0.16618, 0.18428],
                [0.0, 0.0, 0.0, 0.048, 0.073, 0.136, 0.1627, 0.21646, 0.23987],
                [0.0, 0.0, 0.01, 0.07, 0.098, 0.151, 0.1714, 0.2331, 0.25378],
                [0.0, 0.0, 0.0, 0.07, 0.11, 0.1532, 0.181, 0.23674, 0.25931],
                [0.0, 0.0, 0.02, 0.074, 0.097, 0.1534, 0.1854, 0.23484, 0.26035]]

vs_step_136_nd = [[1.0, 0.759, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
                 [0.999, 0.777, 0.032, 0.017, 0.017, 0.017, 0.017, 0.017, 0.017, 0.017, 0.017],
                 [0.996, 0.792, 0.065, 0.051, 0.051, 0.051, 0.051, 0.051, 0.051, 0.051, 0.051],
                 [0.994, 0.809, 0.084, 0.066, 0.066, 0.066, 0.066, 0.066, 0.066, 0.066, 0.066],
                 [0.993, 0.78, 0.091, 0.073, 0.073, 0.073, 0.073, 0.073, 0.073, 0.073, 0.073],
                 [0.996, 0.811, 0.125, 0.098, 0.098, 0.098, 0.098, 0.098, 0.098, 0.098, 0.098],
                 [0.996, 0.798, 0.13, 0.11, 0.11, 0.11, 0.11, 0.11, 0.11, 0.11, 0.11],
                 [0.991, 0.82, 0.112, 0.097, 0.097, 0.097, 0.097, 0.097, 0.097, 0.097, 0.097]]

In [None]:
vs_ep_143_d = [[0.0, 0.0, 0.01, 0.094, 0.092, 0.1544, 0.1838, 0.22492, 0.23496],
               [0.0, 0.12, 0.1, 0.176, 0.199, 0.2578, 0.2973, 0.35514, 0.37215],
               [0.1, 0.12, 0.16, 0.256, 0.278, 0.3592, 0.4, 0.47056, 0.49],
               [0.0, 0.18, 0.18, 0.306, 0.343, 0.4436, 0.4919, 0.5777, 0.60112],
               [0.1, 0.22, 0.23, 0.366, 0.416, 0.5294, 0.5803, 0.66574, 0.69095],
               [0.1, 0.3, 0.26, 0.396, 0.452, 0.5458, 0.6001, 0.68674, 0.71309],
               [0.1, 0.24, 0.26, 0.42, 0.462, 0.5676, 0.6135, 0.69726, 0.72104],
               [0.1, 0.24, 0.26, 0.432, 0.468, 0.5716, 0.6147, 0.69774, 0.7226]]

vs_step_143_d = [[1.0, 0.616, 0.094, 0.092, 0.092, 0.092, 0.092, 0.092, 0.092, 0.092, 0.092],
                 [0.999, 0.662, 0.199, 0.199, 0.199, 0.199, 0.199, 0.199, 0.199, 0.199, 0.199],
                 [0.998, 0.665, 0.278, 0.278, 0.278, 0.278, 0.278, 0.278, 0.278, 0.278, 0.278],
                 [0.99, 0.683, 0.343, 0.343, 0.343, 0.343, 0.343, 0.343, 0.343, 0.343, 0.343],
                 [0.991, 0.738, 0.418, 0.416, 0.416, 0.416, 0.416, 0.416, 0.416, 0.416, 0.416],
                 [0.991, 0.757, 0.452, 0.452, 0.452, 0.452, 0.452, 0.452, 0.452, 0.452, 0.452],
                 [0.99, 0.741, 0.462, 0.462, 0.462, 0.462, 0.462, 0.462, 0.462, 0.462, 0.462],
                 [0.992, 0.743, 0.468, 0.468, 0.468, 0.468, 0.468, 0.468, 0.468, 0.468, 0.468]]

In [None]:
vs_ep_63_d = [[0.1, 0.08, 0.08, 0.188, 0.219, 0.2812, 0.3154, 0.3494, 0.36016],
              [0.5, 0.3, 0.24, 0.336, 0.36, 0.3972, 0.4372, 0.4761, 0.49462],
              [0.6, 0.32, 0.41, 0.426, 0.453, 0.5094, 0.5346, 0.58422, 0.60445],
              [0.3, 0.34, 0.41, 0.434, 0.474, 0.5736, 0.6066, 0.67274, 0.69435],
              [0.3, 0.32, 0.35, 0.488, 0.561, 0.6546, 0.684, 0.75228, 0.7702],
              [0.1, 0.3, 0.4, 0.474, 0.559, 0.663, 0.698, 0.76882, 0.78706],
              [0.1, 0.28, 0.4, 0.522, 0.582, 0.6744, 0.7037, 0.77408, 0.79309],
              [0.1, 0.32, 0.4, 0.512, 0.593, 0.6806, 0.7032, 0.77546, 0.79306]
              ]

vs_step_63_d = [[1.0, 0.72, 0.219, 0.219, 0.219, 0.219, 0.219, 0.219, 0.219, 0.219, 0.219],
                [1.0, 0.732, 0.36, 0.36, 0.36, 0.36, 0.36, 0.36, 0.36, 0.36, 0.36],
                [1.0, 0.735, 0.453, 0.453, 0.453, 0.453, 0.453, 0.453, 0.453, 0.453, 0.453],
                [1.0, 0.742, 0.474, 0.474, 0.474, 0.474, 0.474, 0.474, 0.474, 0.474, 0.474],
                [1.0, 0.746, 0.561, 0.561, 0.561, 0.561, 0.561, 0.561, 0.561, 0.561, 0.561],
                [0.999, 0.768, 0.559, 0.559, 0.559, 0.559, 0.559, 0.559, 0.559, 0.559, 0.559],
                [0.997, 0.792, 0.582, 0.582, 0.582, 0.582, 0.582, 0.582, 0.582, 0.582, 0.582],
                [0.998, 0.783, 0.593, 0.593, 0.593, 0.593, 0.593, 0.593, 0.593, 0.593, 0.593]
                ]

In [None]:
vs_ep_56_nd = [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0422, 0.0554, 0.06038, 0.07315],
               [0.0, 0.04, 0.03, 0.034, 0.032, 0.0746, 0.0857, 0.09884, 0.1095],
               [0.0, 0.02, 0.04, 0.08, 0.081, 0.1096, 0.1179, 0.14338, 0.15669],
               [0.0, 0.02, 0.04, 0.074, 0.108, 0.145, 0.1571, 0.19524, 0.21136],
               [0.0, 0.06, 0.06, 0.13, 0.155, 0.1966, 0.2151, 0.25386, 0.27128],
               [0.0, 0.04, 0.11, 0.148, 0.168, 0.2036, 0.2265, 0.27234, 0.28871],
               [0.0, 0.02, 0.08, 0.152, 0.166, 0.2142, 0.2311, 0.27874, 0.29361],
               [0.0, 0.0, 0.05, 0.152, 0.151, 0.2142, 0.2355, 0.2786, 0.29571]
              ]

vs_step_56_nd = [[1.0, 0.846, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
                 [0.996, 0.861, 0.036, 0.032, 0.032, 0.032, 0.032, 0.032, 0.032, 0.032, 0.032],
                 [0.998, 0.857, 0.09, 0.081, 0.081, 0.081, 0.081, 0.081, 0.081, 0.081, 0.081],
                 [0.994, 0.853, 0.122, 0.108, 0.108, 0.108, 0.108, 0.108, 0.108, 0.108, 0.108],
                 [0.995, 0.867, 0.175, 0.156, 0.155, 0.155, 0.155, 0.155, 0.155, 0.155, 0.155],
                 [0.996, 0.865, 0.176, 0.169, 0.168, 0.168, 0.168, 0.168, 0.168, 0.168, 0.168],
                 [0.995, 0.851, 0.182, 0.166, 0.166, 0.166, 0.166, 0.166, 0.166, 0.166, 0.166],
                 [0.995, 0.845, 0.167, 0.151, 0.151, 0.151, 0.151, 0.151, 0.151, 0.151, 0.151]
                ]

In [None]:
vs_ep_56_d = [[0.1, 0.22, 0.15, 0.22, 0.229, 0.237, 0.2926, 0.30908, 0.3149],
              [0.2, 0.18, 0.21, 0.318, 0.307, 0.347, 0.3898, 0.42558, 0.44236],
              [0.2, 0.2, 0.23, 0.326, 0.371, 0.4432, 0.4777, 0.53092, 0.55169],
              [0.2, 0.28, 0.35, 0.418, 0.449, 0.5362, 0.564, 0.62686, 0.64644],
              [0.1, 0.24, 0.42, 0.468, 0.502, 0.6138, 0.6408, 0.7021, 0.72338],
              [0.2, 0.4, 0.45, 0.5, 0.557, 0.6306, 0.6592, 0.72108, 0.74329],
              [0.1, 0.38, 0.52, 0.522, 0.556, 0.6278, 0.655, 0.7286, 0.74662],
              [0.2, 0.3, 0.39, 0.49, 0.534, 0.6318, 0.6585, 0.72918, 0.74921]
              ]

vs_step_56_d = [[0.999, 0.475, 0.229, 0.229, 0.229, 0.229, 0.229, 0.229, 0.229, 0.229, 0.229],
                [0.991, 0.529, 0.307, 0.307, 0.307, 0.307, 0.307, 0.307, 0.307, 0.307, 0.307],
                [0.991, 0.574, 0.371, 0.371, 0.371, 0.371, 0.371, 0.371, 0.371, 0.371, 0.371],
                [0.984, 0.659, 0.449, 0.449, 0.449, 0.449, 0.449, 0.449, 0.449, 0.449, 0.449],
                [0.985, 0.69, 0.502, 0.502, 0.502, 0.502, 0.502, 0.502, 0.502, 0.502, 0.502],
                [0.98, 0.721, 0.557, 0.557, 0.557, 0.557, 0.557, 0.557, 0.557, 0.557, 0.557],
                [0.994, 0.74, 0.556, 0.556, 0.556, 0.556, 0.556, 0.556, 0.556, 0.556, 0.556],
                [0.979, 0.719, 0.535, 0.534, 0.534, 0.534, 0.534, 0.534, 0.534, 0.534, 0.534]
                ]

In [None]:
vs_ep_377_nd = [[0.0, 0.46, 0.46, 0.436, 0.47, 0.4706, 0.471, 0.49964, 0.51482],
                [0.0, 0.36, 0.38, 0.404, 0.402, 0.4278, 0.4382, 0.4557, 0.46724],
                [0.1, 0.36, 0.33, 0.328, 0.344, 0.3618, 0.3669, 0.39382, 0.40493],
                [0.0, 0.12, 0.18, 0.262, 0.259, 0.2772, 0.2816, 0.30668, 0.31779],
                [0.1, 0.08, 0.13, 0.104, 0.127, 0.1546, 0.1671, 0.19354, 0.20452],
                [0.0, 0.06, 0.08, 0.102, 0.117, 0.1254, 0.1364, 0.16136, 0.17276],
                [0.0, 0.06, 0.06, 0.084, 0.104, 0.1106, 0.1207, 0.14402, 0.15647],
                [0.0, 0.04, 0.07, 0.072, 0.092, 0.1082, 0.1212, 0.1395, 0.15212]
              ]

vs_step_377_nd = [[0.947, 0.597, 0.47, 0.47, 0.47, 0.47, 0.47, 0.47, 0.47, 0.47, 0.47],
                  [0.961, 0.642, 0.407, 0.402, 0.402, 0.402, 0.402, 0.402, 0.402, 0.402, 0.402],
                  [0.968, 0.668, 0.354, 0.344, 0.344, 0.344, 0.344, 0.344, 0.344, 0.344, 0.344],
                  [0.981, 0.698, 0.269, 0.259, 0.259, 0.259, 0.259, 0.259, 0.259, 0.259, 0.259],
                  [0.985, 0.757, 0.143, 0.127, 0.127, 0.127, 0.127, 0.127, 0.127, 0.127, 0.127],
                  [0.993, 0.755, 0.13, 0.117, 0.117, 0.117, 0.117, 0.117, 0.117, 0.117, 0.117],
                  [0.988, 0.739, 0.115, 0.104, 0.104, 0.104, 0.104, 0.104, 0.104, 0.104, 0.104],
                  [0.988, 0.755, 0.103, 0.092, 0.092, 0.092, 0.092, 0.092, 0.092, 0.092, 0.092]
                ]

In [None]:
vs_ep_377_d = [[0.0, 0.0, 0.0, 0.122, 0.124, 0.2494, 0.2451, 0.26128, 0.28516],
               [0.0, 0.0, 0.02, 0.116, 0.112, 0.1934, 0.1961, 0.2216, 0.23753],
               [0.0, 0.0, 0.0, 0.076, 0.081, 0.1536, 0.162, 0.19084, 0.20346],
               [0.0, 0.0, 0.01, 0.05, 0.075, 0.1298, 0.1465, 0.1734, 0.18826],
               [0.0, 0.0, 0.02, 0.084, 0.097, 0.1192, 0.1371, 0.17482, 0.19457],
               [0.0, 0.0, 0.03, 0.084, 0.08, 0.1292, 0.148, 0.1806, 0.20022],
               [0.0, 0.0, 0.01, 0.088, 0.079, 0.1278, 0.1512, 0.18606, 0.20546],
               [0.0, 0.0, 0.02, 0.078, 0.078, 0.122, 0.1466, 0.18436, 0.20485]
              ]

vs_step_377_d = [[1.0, 0.631, 0.124, 0.124, 0.124, 0.124, 0.124, 0.124, 0.124, 0.124, 0.124],
                 [0.998, 0.629, 0.113, 0.112, 0.112, 0.112, 0.112, 0.112, 0.112, 0.112, 0.112],
                 [0.995, 0.612, 0.084, 0.081, 0.081, 0.081, 0.081, 0.081, 0.081, 0.081, 0.081],
                 [0.987, 0.629, 0.077, 0.075, 0.075, 0.075, 0.075, 0.075, 0.075, 0.075, 0.075],
                 [0.986, 0.631, 0.1, 0.097, 0.097, 0.097, 0.097, 0.097, 0.097, 0.097, 0.097],
                 [0.983, 0.597, 0.081, 0.08, 0.08, 0.08, 0.08, 0.08, 0.08, 0.08, 0.08],
                 [0.99, 0.629, 0.079, 0.079, 0.079, 0.079, 0.079, 0.079, 0.079, 0.079, 0.079],
                 [0.987, 0.583, 0.079, 0.078, 0.078, 0.078, 0.078, 0.078, 0.078, 0.078, 0.078]
                ]

In [None]:
vs_ep_696_nd = [[0.0, 0.06, 0.04, 0.026, 0.022, 0.0578, 0.095, 0.09712, 0.13877],
                [0.0, 0.0, 0.02, 0.026, 0.027, 0.0768, 0.1036, 0.11306, 0.13812],
                [0.0, 0.04, 0.06, 0.034, 0.059, 0.0922, 0.1118, 0.12958, 0.14748],
                [0.0, 0.02, 0.04, 0.062, 0.077, 0.1108, 0.1306, 0.15344, 0.16511],
                [0.0, 0.0, 0.05, 0.092, 0.121, 0.1384, 0.1498, 0.17454, 0.18554],
                [0.0, 0.04, 0.09, 0.088, 0.106, 0.1376, 0.1517, 0.1788, 0.18993],
                [0.0, 0.0, 0.05, 0.086, 0.116, 0.135, 0.1588, 0.1842, 0.19347],
                [0.0, 0.06, 0.09, 0.106, 0.117, 0.1426, 0.1593, 0.18384, 0.1959]
              ]

vs_step_696_nd = [[1.0, 0.929, 0.029, 0.022, 0.022, 0.022, 0.022, 0.022, 0.022, 0.022, 0.022],
                  [0.999, 0.839, 0.038, 0.03, 0.027, 0.027, 0.027, 0.027, 0.027, 0.027, 0.027],
                  [0.997, 0.827, 0.079, 0.061, 0.059, 0.059, 0.059, 0.059, 0.059, 0.059, 0.059],
                  [0.995, 0.792, 0.085, 0.077, 0.077, 0.077, 0.077, 0.077, 0.077, 0.077, 0.077],
                  [0.996, 0.736, 0.129, 0.121, 0.121, 0.121, 0.121, 0.121, 0.121, 0.121, 0.121],
                  [0.997, 0.759, 0.112, 0.106, 0.106, 0.106, 0.106, 0.106, 0.106, 0.106, 0.106],
                  [0.995, 0.742, 0.128, 0.116, 0.116, 0.116, 0.116, 0.116, 0.116, 0.116, 0.116],
                  [0.994, 0.726, 0.127, 0.117, 0.117, 0.117, 0.117, 0.117, 0.117, 0.117, 0.117]
                ]

In [None]:
vs_ep_696_d = [[0.0, 0.08, 0.06, 0.144, 0.147, 0.2198, 0.2393, 0.2396, 0.24763],
               [0.1, 0.16, 0.2, 0.23, 0.215, 0.266, 0.2867, 0.2973, 0.31123],
               [0.0, 0.12, 0.18, 0.258, 0.277, 0.3128, 0.3393, 0.35718, 0.37144],
               [0.1, 0.26, 0.28, 0.358, 0.352, 0.3854, 0.404, 0.42336, 0.43603],
               [0.2, 0.2, 0.27, 0.366, 0.41, 0.4476, 0.4586, 0.49156, 0.5086],
               [0.1, 0.16, 0.22, 0.354, 0.417, 0.4536, 0.4827, 0.51286, 0.52706],
               [0.2, 0.38, 0.38, 0.382, 0.401, 0.4634, 0.4886, 0.5193, 0.5321],
               [0.0, 0.24, 0.31, 0.434, 0.436, 0.484, 0.5018, 0.52592, 0.53687]
              ]

vs_step_696_d = [[1.0, 0.266, 0.147, 0.147, 0.147, 0.147, 0.147, 0.147, 0.147, 0.147, 0.147],
                 [0.999, 0.358, 0.215, 0.215, 0.215, 0.215, 0.215, 0.215, 0.215, 0.215, 0.215],
                 [0.998, 0.43, 0.277, 0.277, 0.277, 0.277, 0.277, 0.277, 0.277, 0.277, 0.277],
                 [0.992, 0.518, 0.352, 0.352, 0.352, 0.352, 0.352, 0.352, 0.352, 0.352, 0.352],
                 [0.993, 0.616, 0.41, 0.41, 0.41, 0.41, 0.41, 0.41, 0.41, 0.41, 0.41],
                 [0.991, 0.638, 0.417, 0.417, 0.417, 0.417, 0.417, 0.417, 0.417, 0.417, 0.417],
                 [0.989, 0.635, 0.402, 0.401, 0.401, 0.401, 0.401, 0.401, 0.401, 0.401, 0.401],
                 [0.986, 0.635, 0.437, 0.436, 0.436, 0.436, 0.436, 0.436, 0.436, 0.436, 0.436]
                ]

In [None]:
vs_ep_703_nd = [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0374, 0.0689, 0.097, 0.13331],
                [0.0, 0.0, 0.0, 0.02, 0.025, 0.0636, 0.0893, 0.12022, 0.13903],
                [0.0, 0.0, 0.0, 0.058, 0.065, 0.0996, 0.111, 0.13708, 0.1504],
                [0.0, 0.04, 0.05, 0.06, 0.068, 0.1146, 0.1293, 0.1565, 0.16897],
                [0.0, 0.04, 0.06, 0.084, 0.091, 0.1404, 0.1526, 0.18218, 0.19359],
                [0.1, 0.02, 0.03, 0.096, 0.117, 0.145, 0.1597, 0.18812, 0.20053],
                [0.0, 0.02, 0.02, 0.102, 0.111, 0.1478, 0.1636, 0.19088, 0.20289],
                [0.0, 0.06, 0.05, 0.12, 0.115, 0.155, 0.1665, 0.19318, 0.20463]
              ]

vs_step_703_nd = [[1.0, 0.823, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
                  [0.998, 0.814, 0.028, 0.025, 0.025, 0.025, 0.025, 0.025, 0.025, 0.025, 0.025],
                  [0.998, 0.825, 0.065, 0.065, 0.065, 0.065, 0.065, 0.065, 0.065, 0.065, 0.065],
                  [0.994, 0.781, 0.075, 0.068, 0.068, 0.068, 0.068, 0.068, 0.068, 0.068, 0.068],
                  [0.989, 0.751, 0.097, 0.091, 0.091, 0.091, 0.091, 0.091, 0.091, 0.091, 0.091],
                  [0.995, 0.779, 0.129, 0.117, 0.117, 0.117, 0.117, 0.117, 0.117, 0.117, 0.117],
                  [0.996, 0.768, 0.122, 0.111, 0.111, 0.111, 0.111, 0.111, 0.111, 0.111, 0.111],
                  [0.99, 0.755, 0.124, 0.115, 0.115, 0.115, 0.115, 0.115, 0.115, 0.115, 0.115]
                ]

In [None]:
vs_ep_703_d = [[0.0, 0.0, 0.1, 0.164, 0.17, 0.224, 0.2307, 0.24306, 0.25042],
               [0.0, 0.06, 0.15, 0.216, 0.238, 0.284, 0.2896, 0.30524, 0.31641],
               [0.0, 0.1, 0.18, 0.268, 0.293, 0.336, 0.3437, 0.37524, 0.38776],
               [0.1, 0.1, 0.19, 0.322, 0.341, 0.4024, 0.4168, 0.45206, 0.46408],
               [0.2, 0.34, 0.35, 0.384, 0.431, 0.4776, 0.4948, 0.52832, 0.5431],
               [0.3, 0.26, 0.28, 0.432, 0.461, 0.5074, 0.5198, 0.54994, 0.56395],
               [0.2, 0.16, 0.28, 0.394, 0.472, 0.503, 0.5215, 0.55644, 0.56915],
               [0.4, 0.16, 0.26, 0.404, 0.445, 0.5004, 0.5217, 0.55864, 0.57218]
              ]

vs_step_703_d = [[0.987, 0.308, 0.17, 0.17, 0.17, 0.17, 0.17, 0.17, 0.17, 0.17, 0.17],
                 [0.99, 0.384, 0.238, 0.238, 0.238, 0.238, 0.238, 0.238, 0.238, 0.238, 0.238],
                 [0.988, 0.474, 0.293, 0.293, 0.293, 0.293, 0.293, 0.293, 0.293, 0.293, 0.293],
                 [0.978, 0.558, 0.341, 0.341, 0.341, 0.341, 0.341, 0.341, 0.341, 0.341, 0.341],
                 [0.982, 0.659, 0.431, 0.431, 0.431, 0.431, 0.431, 0.431, 0.431, 0.431, 0.431],
                 [0.983, 0.661, 0.461, 0.461, 0.461, 0.461, 0.461, 0.461, 0.461, 0.461, 0.461],
                 [0.982, 0.705, 0.472, 0.472, 0.472, 0.472, 0.472, 0.472, 0.472, 0.472, 0.472],
                 [0.981, 0.67, 0.445, 0.445, 0.445, 0.445, 0.445, 0.445, 0.445, 0.445, 0.445]
                ]

In [None]:
vs_ep_439_nd = [[0.0, 0.0, 0.13, 0.14, 0.14, 0.1548, 0.1564, 0.15406, 0.15399],
                [0.0, 0.04, 0.13, 0.228, 0.257, 0.2638, 0.2616, 0.2651, 0.27057],
                [0.4, 0.18, 0.31, 0.322, 0.34, 0.3622, 0.3588, 0.37572, 0.38465],
                [0.2, 0.26, 0.44, 0.414, 0.441, 0.4532, 0.4647, 0.4887, 0.4945],
                [0.4, 0.28, 0.39, 0.528, 0.555, 0.567, 0.5769, 0.59934, 0.60668],
                [0.4, 0.4, 0.46, 0.546, 0.576, 0.5796, 0.6007, 0.62502, 0.63356],
                [0.1, 0.38, 0.49, 0.544, 0.573, 0.6052, 0.6106, 0.63124, 0.64333],
                [0.1, 0.48, 0.56, 0.574, 0.566, 0.5956, 0.6073, 0.64006, 0.64719]
              ]

vs_step_439_nd = [[0.991, 0.618, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14, 0.14],
                  [0.993, 0.681, 0.261, 0.257, 0.257, 0.257, 0.257, 0.257, 0.257, 0.257, 0.257],
                  [0.991, 0.758, 0.345, 0.34, 0.34, 0.34, 0.34, 0.34, 0.34, 0.34, 0.34],
                  [0.989, 0.791, 0.447, 0.441, 0.441, 0.441, 0.441, 0.441, 0.441, 0.441, 0.441],
                  [0.99, 0.831, 0.558, 0.555, 0.555, 0.555, 0.555, 0.555, 0.555, 0.555, 0.555],
                  [0.993, 0.843, 0.578, 0.576, 0.576, 0.576, 0.576, 0.576, 0.576, 0.576, 0.576],
                  [0.987, 0.843, 0.577, 0.573, 0.573, 0.573, 0.573, 0.573, 0.573, 0.573, 0.573],
                  [0.99, 0.848, 0.57, 0.566, 0.566, 0.566, 0.566, 0.566, 0.566, 0.566, 0.566]
                ]

In [None]:
vs_ep_439_d = [[0.0, 0.04, 0.05, 0.142, 0.155, 0.1788, 0.1834, 0.2109, 0.21891],
               [0.1, 0.2, 0.21, 0.28, 0.274, 0.2886, 0.2892, 0.31152, 0.31632],
               [0.0, 0.2, 0.23, 0.354, 0.344, 0.3812, 0.3925, 0.40944, 0.41833],
               [0.3, 0.44, 0.46, 0.47, 0.466, 0.4848, 0.4962, 0.51774, 0.52558],
               [0.4, 0.44, 0.51, 0.536, 0.552, 0.5842, 0.6, 0.62364, 0.63192],
               [0.4, 0.58, 0.54, 0.574, 0.586, 0.6052, 0.6253, 0.65064, 0.66252],
               [0.3, 0.5, 0.48, 0.552, 0.578, 0.622, 0.6357, 0.65996, 0.67024],
               [0.5, 0.56, 0.62, 0.588, 0.609, 0.6314, 0.6395, 0.66358, 0.67169]
              ]

vs_step_439_d = [[0.997, 0.483, 0.155, 0.155, 0.155, 0.155, 0.155, 0.155, 0.155, 0.155, 0.155],
                 [0.997, 0.608, 0.276, 0.274, 0.274, 0.274, 0.274, 0.274, 0.274, 0.274, 0.274],
                 [1.0, 0.664, 0.344, 0.344, 0.344, 0.344, 0.344, 0.344, 0.344, 0.344, 0.344],
                 [0.996, 0.755, 0.468, 0.466, 0.466, 0.466, 0.466, 0.466, 0.466, 0.466, 0.466],
                 [0.995, 0.841, 0.559, 0.552, 0.552, 0.552, 0.552, 0.552, 0.552, 0.552, 0.552],
                 [0.996, 0.852, 0.594, 0.586, 0.586, 0.586, 0.586, 0.586, 0.586, 0.586, 0.586],
                 [0.995, 0.869, 0.583, 0.578, 0.578, 0.578, 0.578, 0.578, 0.578, 0.578, 0.578],
                 [0.998, 0.869, 0.615, 0.609, 0.609, 0.609, 0.609, 0.609, 0.609, 0.609, 0.609]
                ]