### Test IHDP data

In [1]:
%pylab inline
pylab.rcParams['figure.figsize'] = (6, 6)

from lib.bandits import LinUCB, ThresholdBandit, GreedyBandit, ThresholdConsBandit, RarelySwitchingLinUCB, RCTBandit,\
    ThresholdMaxConsBandit, ThresholdMaxConsGreedyBandit
from lib.generator import IHDPGenerator, IHDPParams
import numpy as np 
import pandas
import matplotlib.pyplot as plt
from sklearn.utils import shuffle
import seaborn as sns
sns.set_style('ticks')

def smooth(y, box_pts=100):
    box = np.ones(box_pts)/box_pts
    if len(y.shape) == 2:
        y_smooth = y.copy()
        for i in range(y.shape[0]):
            y_smooth[i,:] = np.convolve(y[i,:], box, mode='same')
    else:
        y_smooth = np.convolve(y, box, mode='same')
    return y_smooth

Populating the interactive namespace from numpy and matplotlib


SyntaxError: Missing parentheses in call to 'print'. Did you mean print("Updating theta_tilde")? (bandits.py, line 447)

In [None]:
intercept = True #constant term in linear model
M = 100            #number of runs
fn_in = 'data/ihdp_npci_1-100.all.npz'
save = False

In [None]:
params = IHDPParams(fn_in)

In [None]:
N = params.n       #number of timesteps
k = 2           #number of arms
d = params.d           #dimension of context (includes one dim for intercept term if present)
delta = 1./N          #confidence level, adjusted for horizon

## Look at the dataset

In [None]:
params.ctx.shape

In [None]:
params = IHDPParams(fn_in)
params.ctx[0:10,:,0]

## Run online learning algs

In [None]:
regret_rct = np.zeros((M,N))
for j in range(M):
    print("Run %d/%d"%(j+1,M))
    params = IHDPParams(fn_in)
    generator = IHDPGenerator(params)
    bandit = RCTBandit(generator, delta, n_pulls=N)
    for i in range(N):
        (ctx, arm_idx, obs, reg) = bandit.step()
        regret_rct[j,i] = reg
    if hasattr(bandit, 'update_theta'):
        updatepol_rct[m,:] = bandit.update_theta[0:N]

In [None]:
regret_ucb = np.zeros((M,N))
updatepol_ucb = np.zeros((M,N))
for j in range(M):
    print("Run %d/%d"%(j+1,M))
    params = IHDPParams(fn_in)
    generator = IHDPGenerator(params)
    linucb = LinUCB(generator, delta, n_pulls=N)
    for i in range(N):
        (ctx, arm_idx, obs, reg) = linucb.step()
        regret_ucb[j,i] = reg
    if hasattr(linucb, 'update_theta'):
        updatepol_ucb[m,:] = linucb.update_theta[0:N]

In [None]:
regret_thr = np.zeros((M,N))
updatepol_thr = np.zeros((M,N))
for j in range(M):
    print("Run %d/%d"%(j+1,M))
    params = IHDPParams(fn_in)
    generator = IHDPGenerator(params)
    thresholdbandit = ThresholdBandit(generator, delta, n_pulls = N)
    for i in range(N):
        (ctx, arm_idx, obs, reg) = thresholdbandit.step()
        regret_thr[j,i] = reg
    if hasattr(thresholdbandit, 'update_theta'):
        updatepol_thr[j,:] = thresholdbandit.update_theta[0:N]

In [None]:
regret_conthr = np.zeros((M,N))
updatepol_thrcons = np.zeros((M,N))
for j in range(M):
    print("Run %d/%d"%(j+1,M))
    params = IHDPParams(fn_in)
    generator = IHDPGenerator(params)
    thresholdbandit = ThresholdConsBandit(generator, delta, n_pulls = N)
    for i in range(N):
        (ctx, arm_idx, obs, reg) = thresholdbandit.step()
        regret_conthr[j,i] = reg
    if hasattr(thresholdbandit, 'update_theta'):
        updatepol_thrcons[j,:] = thresholdbandit.update_theta[0:N]

In [None]:
regret_gre = np.zeros((M,N))
for j in range(M):
    print("Run %d/%d"%(j+1,M))
    params = IHDPParams(fn_in)
    generator = IHDPGenerator(params)
    thresholdbandit = GreedyBandit(generator, delta, n_pulls = N)
    for i in range(N):
        (ctx, arm_idx, obs, reg) = thresholdbandit.step()
        regret_gre[j,i] = reg

In [None]:
regret_rs = np.zeros((M,N))
updatepol_rs = np.zeros((M,N))
for j in range(M):
    print("Run %d/%d"%(j+1,M))
    params = IHDPParams(fn_in)
    generator = IHDPGenerator(params)
    thresholdbandit = RarelySwitchingLinUCB(generator, delta, n_pulls = N)
    for i in range(N):
        (ctx, arm_idx, obs, reg) = thresholdbandit.step()
        regret_rs[j,i] = reg
    if hasattr(thresholdbandit, 'update_theta'):
        updatepol_rs[j,:] = thresholdbandit.update_theta[0:N]

In [None]:
regret_mcg = np.zeros((M,N))
updatepol_mcg = np.zeros((M,N))
for j in range(M):
    print("Run %d/%d"%(j+1,M))
    params = IHDPParams(fn_in)
    generator = IHDPGenerator(params)
    thresholdbandit = ThresholdMaxConsGreedyBandit(generator, delta, n_pulls = N)
    for i in range(N):
        (ctx, arm_idx, obs, reg) = thresholdbandit.step()
        regret_mcg[j,i] = reg
    if hasattr(thresholdbandit, 'update_theta'):
        updatepol_mcg[j,:] = thresholdbandit.update_theta[0:N]

In [None]:
regret_mc = np.zeros((M,N))
updatepol_mc = np.zeros((M,N))
for j in range(M):
    print("Run %d/%d"%(j+1,M))
    params = IHDPParams(fn_in)
    generator = IHDPGenerator(params)
    thresholdbandit = ThresholdMaxConsBandit(generator, delta, n_pulls = N)
    for i in range(N):
        (ctx, arm_idx, obs, reg) = thresholdbandit.step()
        regret_mc[j,i] = reg
    if hasattr(thresholdbandit, 'update_theta'):
        updatepol_mc[j,:] = thresholdbandit.update_theta[0:N]

## The regret

In [None]:
fig, axes = plt.subplots(1,2, figsize = (8,3))

sns.tsplot(np.divide(np.cumsum(regret_ucb, axis=1), np.arange(N)), color = 'C0', ax = axes[0])
sns.tsplot(np.divide(np.cumsum(regret_gre, axis=1), np.arange(N)), color = 'C1', ax = axes[0])
#sns.tsplot(np.divide(np.cumsum(regret_thr, axis=1), np.arange(N)), color = 'C2', ax = axes[0])
#sns.tsplot(np.divide(np.cumsum(regret_conthr, axis=1), np.arange(N)), color = 'C3', ax = axes[0])
sns.tsplot(np.divide(np.cumsum(regret_rs, axis=1), np.arange(N)), color = 'C4', ax = axes[0])
sns.tsplot(np.divide(np.cumsum(regret_rct, axis=1), np.arange(N)), color = 'black', ax = axes[0])

sns.tsplot(np.divide(np.cumsum(regret_mc, axis=1), np.arange(N)), color = 'C5', ax = axes[0])
sns.tsplot(np.divide(np.cumsum(regret_mcg, axis=1), np.arange(N)), color = 'C6', ax = axes[0])

axes[0].legend(['LinUCB', 'Greedy', 'RS greedy', 'RS conservative', 'RS LinUCB', 'RCT', 'RS MaxCon', 'RS MaxConGreedy'])
axes[0].set_xlabel('round')
axes[0].set_ylabel('cumulative per step regret')

#sns.tsplot(np.cumsum(updatepol_thr, axis=1), color = 'C2', ax = axes[1])
#sns.tsplot(np.cumsum(updatepol_thrcons, axis=1), color = 'C3', ax = axes[1])
sns.tsplot(np.cumsum(updatepol_rs, axis=1), color = 'C4', ax = axes[1])
sns.tsplot(np.cumsum(updatepol_mc, axis=1), color = 'C5', ax = axes[1])
sns.tsplot(np.cumsum(updatepol_mcg, axis=1), color = 'C6', ax = axes[1])

#axes[1].plot(np.cumsum(updatepol_thr, axis = 1).T, color = 'grey', linewidth = 0.5)
#axes[1].plot(np.mean(np.cumsum(updatepol_thr, axis = 1), axis = 0), color = 'green', linewidth = 2)
#axes[1].plot(np.cumsum(updatepol_thrcons, axis = 1).T, color = 'grey', linewidth = 0.5)
#axes[1].plot(np.mean(np.cumsum(updatepol_thrcons, axis = 1), axis = 0), color = 'blue', linewidth = 2)
#axes[1].plot(np.cumsum(updatepol_rs, axis = 1).T, color = 'grey', linewidth = 0.5)
#axes[1].plot(np.mean(np.cumsum(updatepol_rs, axis = 1), axis = 0), color = 'orange', linewidth = 2)

axes[1].set_xlabel('round')
axes[1].set_ylabel('cumulative policy changes')

sns.despine(trim = True)
#plt.savefig('./figs/ihdp.pdf')

In [None]:
#The number of changes in policy.....
print('Mean changes for mcg: %f'%np.mean(np.cumsum(updatepol_mcg, axis=1)[:,-1]))
print('Mean changes for mc: %f'%np.mean(np.cumsum(updatepol_mc, axis=1)[:,-1]))
print('Mean changes for rs: %f'%np.mean(np.cumsum(updatepol_rs, axis=1)[:,-1]))
print('Mean changes for thrcons: %f'%np.mean(np.cumsum(updatepol_thrcons, axis=1)[:,-1]))
print('Mean changes for thrgre: %f'%np.mean(np.cumsum(updatepol_thr, axis=1)[:,-1]))