### Test IHDP data

In [1]:
%pylab inline
pylab.rcParams['figure.figsize'] = (6, 6)

from lib.bandits import LinUCB, ThresholdBandit, GreedyBandit, ThresholdConsBandit
from lib.generator import IHDPGenerator, IHDPParams
import numpy as np 
import pandas
import matplotlib.pyplot as plt
from sklearn.utils import shuffle
import seaborn as sns
sns.set_style('ticks')

def smooth(y, box_pts=100):
    box = np.ones(box_pts)/box_pts
    if len(y.shape) == 2:
        y_smooth = y.copy()
        for i in range(y.shape[0]):
            y_smooth[i,:] = np.convolve(y[i,:], box, mode='same')
    else:
        y_smooth = np.convolve(y, box, mode='same')
    return y_smooth

Populating the interactive namespace from numpy and matplotlib


The minimum supported version is 2.4.6



In [2]:
intercept = True #constant term in linear model
M = 100            #number of runs
fn_in = 'data/ihdp_npci_1-100.all.npz'
save = False

In [3]:
params = IHDPParams(fn_in)

In [4]:
N = params.n       #number of timesteps
k = 2           #number of arms
d = params.d           #dimension of context (includes one dim for intercept term if present)
delta = 1./N          #confidence level, adjusted for horizon

## Look at the dataset

In [5]:
params.ctx.shape

(747, 25, 100)

In [None]:
params = IHDPParams(fn_in)
params.ctx[0:10,:,0]

array([[ 0.98668842,  0.99634625, -1.10562395,  2.24431956,  1.12129247,
        -0.02970882,  1.        ,  0.        ,  1.        ,  1.        ,
         0.        ,  0.        ,  1.        ,  1.        ,  0.        ,
         1.        ,  0.        ,  1.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  1.        ],
       [ 1.41901117,  1.79587438, -1.47798693,  0.16170253, -1.37939562,
        -0.52655561,  0.        ,  0.        ,  0.        ,  1.        ,
         0.        ,  0.        ,  1.        ,  1.        ,  0.        ,
         1.        ,  0.        ,  1.        ,  0.        ,  1.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ],
       [-1.06684465, -1.8020022 ,  1.12855393,  1.20301104,  0.68367206,
         1.29521594,  0.        ,  0.        ,  1.        ,  0.        ,
         0.        ,  1.        ,  1.        ,  1.        ,  0.        ,
         1.        ,  1.        ,  1.        ,  0

## Run online learning algs

In [None]:
regret_ucb = np.zeros((M,N))
for j in range(M):
    print("Run %d/%d"%(j+1,M))
    params = IHDPParams(fn_in)
    generator = IHDPGenerator(params)
    linucb = LinUCB(generator, delta, n_pulls=N)
    for i in range(N):
        (ctx, arm_idx, obs, reg) = linucb.step()
        regret_ucb[j,i] = reg

Run 1/100


  self.beta = lambda v: np.sqrt(self.lamd)*L + np.sqrt(np.log(np.linalg.det(v))-self.d*np.log(self.lamd)-2*np.log(delta))


In [None]:
regret_thr = np.zeros((M,N))
for j in range(M):
    print("Run %d/%d"%(j+1,M))
    params = IHDPParams(fn_in)
    generator = IHDPGenerator(params)
    thresholdbandit = ThresholdBandit(generator, delta, n_pulls = N)
    for i in range(N):
        (ctx, arm_idx, obs, reg) = thresholdbandit.step()
        regret_thr[j,i] = reg

In [None]:
regret_conthr = np.zeros((M,N))
for j in range(M):
    print("Run %d/%d"%(j+1,M))
    params = IHDPParams(fn_in)
    generator = IHDPGenerator(params)
    thresholdbandit = ThresholdConsBandit(generator, delta, n_pulls = N)
    for i in range(N):
        (ctx, arm_idx, obs, reg) = thresholdbandit.step()
        regret_conthr[j,i] = reg

In [None]:
regret_gre = np.zeros((M,N))
for j in range(M):
    print("Run %d/%d"%(j+1,M))
    params = IHDPParams(fn_in)
    generator = IHDPGenerator(params)
    thresholdbandit = GreedyBandit(generator, delta, n_pulls = N)
    for i in range(N):
        (ctx, arm_idx, obs, reg) = thresholdbandit.step()
        regret_gre[j,i] = reg

## The oracle

In [None]:
#Fit least squares with all the data

#Predict 

## The regret

In [None]:
sns.tsplot(regret_ucb, color = 'red')
sns.tsplot(regret_thr, color = 'green')
sns.tsplot(regret_conthr, color = 'blue')
sns.tsplot(regret_gre, color = 'purple')

plt.legend(['LinUCB', 'Threshold greedy', 'Threshold conservative', 'Greedy'])
plt.xlabel('round')
plt.ylabel('regret')
sns.despine(trim = True)