### Test LinUCB, ThresholdBandit and GreedyBandit

In [1]:
%pylab inline
pylab.rcParams['figure.figsize'] = (6, 6)

from lib.bandits import LinUCB, ThresholdBandit,GreedyBandit, ConsLinUCB, expected_regret
from lib.generator import LinearGeneratorParams, LinearGenerator
import numpy as np 

import matplotlib.pyplot as plt
from scipy.stats import truncnorm
import seaborn as sns
sns.set_style('ticks')

Populating the interactive namespace from numpy and matplotlib


The minimum supported version is 2.4.6



In [3]:
#alg = 'greedy'
#alg = 'linucb'
#alg = 'threshold'
#alg = 'thresholdcons'
alg = 'conslinucb'

M = 3    #number of runs
N = 10000 #number of timesteps
save = False
max_alpha = 2
max_beta = 2
k = 4    #Number of arms
d = 5   #Dimension of context (includes one dim for intercept term)
intercept = True
evaluate_every = 100

if alg == 'greedy':
	BanditAlg = GreedyBandit
elif alg == 'linucb':
	BanditAlg = LinUCB
elif alg == 'threshold':
	BanditAlg = ThresholdBandit
elif alg == 'thresholdcons':
	BanditAlg = ThresholdConsBandit
elif alg == 'conslinucb':
	BanditAlg = ConsLinUCB
else:
	print "Select a valid algorithm"

## Run CLUCB 

In [None]:
#Generate slopes and intercepts
alphas = truncnorm.rvs(-max_alpha, max_alpha, scale = 1, size=(M,k,d-1))
betas = truncnorm.rvs(-max_beta, max_beta, scale = 1, size=(M,k))+4

baseline_alphas = truncnorm.rvs(-max_alpha, max_alpha, scale = 1, size=(M,1,d-1))
baseline_betas = truncnorm.rvs(-max_beta, max_beta, scale = 1, size=(M,1))+2

regret = np.zeros((M, N))
expt_regret = np.zeros((M, N))
arm_pulls = np.zeros((M, N, k+1))
n_changes = np.zeros((M, N))
update_pol = np.zeros((M, N))

Running conslinucb algorithm


In [None]:
print("Running %s algorithm"%alg)
for m in range(M):
	params = LinearGeneratorParams(np.atleast_2d(alphas[m,:,:]), betas[m,:], d = d, k = k, intercept = intercept)
	generator = LinearGenerator(params)
	bandit = BanditAlg(generator, (np.squeeze(baseline_alphas[m,:]), baseline_betas[m,0]), alpha = 0.1)
	print("Run: %d/%d"%(m+1,M))
	for i in range(N):
		(ctx, arm_idx, obs, r) = bandit.step()
		#print((arm_idx, obs, r))
		regret[m,i] = r
		if arm_idx >= 0:
			arm_pulls[m,i,arm_idx] = 1
		else:
			arm_pulls[m,i,k] = 1

Run: 1/3
Run: 2/3


In [None]:
bandit.arms_idx

## LinUCB for comparison

In [None]:
BanditAlg = LinUCB    
regret_linucb = np.zeros((M, N))
arm_pulls_linucb = np.zeros((M, N, k+1))

In [None]:
for m in range(M):
	params = LinearGeneratorParams(np.atleast_2d(alphas[m,:,:]), betas[m,:], d = d, k = k, intercept = intercept)
	generator = LinearGenerator(params)
	bandit = BanditAlg(generator)
	print("Run: %d/%d"%(m+1,M))
	for i in range(N):
		(ctx, arm_idx, obs, r) = bandit.step()
		#print((arm_idx, obs, r))
		regret_linucb[m,i] = r
		if arm_idx >= 0:
			arm_pulls_linucb[m,i,arm_idx] = 1
		else:
			arm_pulls_linucb[m,i,k] = 1

## Comparison of the two methods

Trade-off between convergence rate, risk, and confidence level

In [None]:
#Vary the confidence level, plot convergence rate, regret, risk
sns.tsplot(np.cumsum(regret, axis = 1), color = 'red')
sns.tsplot(np.cumsum(regret_linucb, axis = 1), color = 'blue')
plt.xlabel('round')
plt.ylabel('cumulative regret')
plt.legend(['CLUCB', 'LinUCB'])
sns.despine(trim = True)