In [1]:
import os
from adaptive.inference import analyze, aw_scores
from adaptive.experiment import *
from adaptive.ridge import *
from adaptive.datagen import *
from adaptive.saving import *
import random
random.seed(60637) 
np.random.seed(60637)
seed = random.randrange(99999)

In [2]:
K = 4 # Number of arms
p = 3 # Number of features
T = 7000 # Sample size
batch_sizes = [200] + [100] * 68 # Batch sizes
signal_strength = 0.5
config = dict(T=T, K=K, p=p, noise_form='normal', noise_std=1, noise_scale=0.5, floor_start=1/K,
      bandit_model = 'RegionModel', floor_decay=0.8, dgp='synthetic_signal')

# Collect data from environment, run experiment
data_exp, mus = simple_tree_data(T=T, K=K, p=p, noise_std=1,
    split=0.5, signal_strength=signal_strength, noise_form='normal', seed = seed)
xs, ys = data_exp['xs'], data_exp['ys']
data = run_experiment(xs, ys, config, batch_sizes=batch_sizes)
yobs, ws, probs = data['yobs'], data['ws'], data['probs']

In [3]:
# Estimate muhat and gammahat
muhat = ridge_muhat_lfo_pai(xs, ws, yobs, K, batch_sizes)
gammahat = aw_scores(yobs=yobs, ws=ws, balwts=1 / collect(collect3(probs), ws),
                     K=K, muhat=collect3(muhat))

Best arm

In [4]:
best_mtx = np.zeros((T, K))
best_mtx[:, np.argmax(mus)] = 1

analyze(probs=probs,
                gammahat=gammahat,
                policy=best_mtx,
                policy_value=0)

{'uniform': array([-0.05295674,  0.09204418]),
 'propscore_expected': array([0.18769294, 0.00156756]),
 'propscore_X': array([0.19502784, 0.00068292]),
 'lvdl_expected': array([0.13694664, 0.0038961 ]),
 'lvdl_X': array([0.16564352, 0.00134816])}

Optimal policy

In [5]:
optimal_mtx = expand(np.ones(T), np.argmax(data_exp['muxs'], axis=1), K)
analyze(probs=probs,
                gammahat=gammahat,
                policy=optimal_mtx,
                policy_value=0)

{'uniform': array([0.46466481, 0.0032442 ]),
 'propscore_expected': array([0.40170754, 0.00317846]),
 'propscore_X': array([5.14734972e-01, 3.21614330e-04]),
 'lvdl_expected': array([0.44504472, 0.00333831]),
 'lvdl_X': array([5.01188618e-01, 4.91718301e-04])}

Contrasts

In [6]:
contrast_mtx = optimal_mtx - best_mtx
analyze(probs=probs,
                gammahat=gammahat,
                policy=contrast_mtx,
                policy_value=0)

{'uniform': array([0.51762156, 0.09164901]),
 'propscore_expected': array([0.31753274, 0.00630647]),
 'propscore_X': array([0.28699802, 0.00297868]),
 'lvdl_expected': array([0.37517445, 0.01329507]),
 'lvdl_X': array([0.34038577, 0.00827233])}

In [7]:
np.savetxt("results/muxs.csv", data_exp['muxs'], delimiter=",")
np.savetxt("results/gammahat.csv", gammahat, delimiter=",")

In [8]:
np.save("results/probs", probs)