<center>
<h2>
Part 2. Bernoulli Bandits
</h2>
</center>

In [None]:
import numpy as np
import matplotlib.pyplot as plt

from bandits import BernoulliBandit
from algorithms import FTL, UCB, UCB_V, kl_UCB
from visualization import *

Question 1a

see `report.pdf`

Question 1b

see `algorithms.py`

Question 1c

In [None]:
K = 2
T = 1000
n = 1000
probs = [0.5, 0.6]

bern_bandit = BernoulliBandit(K, probs)
ftl = FTL(bern_bandit)

In [None]:
ftl_regrets = []
ftl_cum_regrets = np.zeros((n,T))
for i in range(n):
    ftl.reset()
    ftl.run(T, n_run=i, ntot_run=n)
    ftl_regrets.append(ftl.regret)
    ftl_cum_regrets[i] = ftl.cum_regret

run 1000/1000 | running step 1000/1000

In [None]:
plot_regret_hist(ftl_regrets, save_fig=True, algo='FTL')

either suffers a null regret as the first pull of the each arm gave 1 for the best arm and 0 for the worse.<br>
or suffers a high regret (0.1 * 100 = 10) as the first pull of the each arm gave 0 for the best arm and 1 for the worse: keeps pulling the worse arm whose $\mu$ estimates will always be bigger than 0.

Question 1d

In [None]:
plot_mean_regret(ftl_cum_regrets, alpha=0.05, save_fig=True, title='Average cumulative regret over time', algo='FTL')

we see the linear dependance in time as a mean of the 2 edges cases we talked about in the previous question (new slope is $\frac{0 + \alpha}{2}$)

FTL is **not** a good algorithm for stochastic bandits because of the previoulsy higlighted linear dependance in T

Question 2a to 2e

see `report.pdf`

Question 2f

see `algorithms.py`

Question 2g

In [None]:
K = 2
T = 1000
n = 1000
probs = [0.5, 0.6]
sigma_square = 1./4

bern_bandit = BernoulliBandit(K, probs)
ucb = UCB(bern_bandit, sigma_square)

In [None]:
ucb_regrets = []
ucb_cum_regrets = np.zeros((n,T))
for i in range(n):
    ucb.reset()
    ucb.run(T, n_run=i, ntot_run=n)
    ucb_regrets.append(ucb.regret)
    ucb_cum_regrets[i] = ucb.cum_regret

run 1000/1000 | running step 1000/1000

In [None]:
plot_mean_regret(ucb_cum_regrets, alpha=0.01, save_fig=True, save_name='p2_2g', title='Average cumulative regret over time', algo='UCB')

In [None]:
regret_dict = {
    'FTL': [ftl_cum_regrets, '#53B3CD'],
    'UCB': [ucb_cum_regrets, '#3A6FB6']
}

In [None]:
title = 'Average cumulative regret over time ($p_1 = 0.5, p_2 = 0.6$)'
plot_multiple_avg_regrets(regret_dict, save_fig=True, title=title)

Question 2h

In [None]:
K = 2
T = 1000
n = 1000
sigma_squares = [0, 1./32, 1./16, 1./4, 1.]

In [None]:
probs_1 = [0.6, 0.5]
bern_bandit_1 = BernoulliBandit(K, probs_1)

In [None]:
m = len(sigma_squares)
ucb1_sigma_regrets = np.zeros((n,m))
for j, sigma_square in enumerate(sigma_squares):
    print(f'sigma: {j+1}/{m}')
    ucb1 = UCB(bern_bandit, sigma_square)
    for i in range(n):
        ucb1.reset()
        ucb1.run(T, n_run=i, ntot_run=n)
        ucb1_sigma_regrets[i,j] = ucb1.regret
    print()

sigma: 1/5
run 1000/1000 | running step 1000/1000
sigma: 2/5
run 1000/1000 | running step 1000/1000
sigma: 3/5
run 1000/1000 | running step 1000/1000
sigma: 4/5
run 1000/1000 | running step 1000/1000
sigma: 5/5
run 1000/1000 | running step 1000/1000


In [None]:
title = 'Average regret of UCB($\sigma^2$) as a function of $\sigma$ ($p_1 = 0.6, p_2 = 0.5$)'
plot_final_regret_sigma_square(ucb1_regrets, sigma_squares, save_fig=True, save_name='p2_2h_1', title=title)

In [None]:
probs_2 = [0.85, 0.95]
bern_bandit_2 = BernoulliBandit(K, probs_2)

In [None]:
m = len(sigma_squares)
ucb2_sigma_regrets = np.zeros((n,m))
for j, sigma_square in enumerate(sigma_squares):
    print(f'sigma: {j+1}/{m}')
    ucb2 = UCB(bern_bandit_2, sigma_square)
    for i in range(n):
        ucb2.reset()
        ucb2.run(T, n_run=i, ntot_run=n)
        ucb2_sigma_regrets[i,j] = ucb2.regret
    print()

sigma: 1/5
run 1000/1000 | running step 1000/1000
sigma: 2/5
run 1000/1000 | running step 1000/1000
sigma: 3/5
run 1000/1000 | running step 1000/1000
sigma: 4/5
run 1000/1000 | running step 1000/1000
sigma: 5/5
run 1000/1000 | running step 1000/1000


In [None]:
title = 'Average regret of UCB($\sigma$) as a function of $\sigma$ ($p_1 = 0.85, p_2 = 0.95$)'
plot_final_regret_sigma_square(ucb2_sigma_regrets, sigma_squares, save_fig=True, save_name='p2_2h_2', title=title)

Question 3

In [None]:
from utils import bernoulli_var, sub_gaussian_sigma

p_val = np.arange(0, 1.001, 0.001)

plt.figure(figsize=(10,5))
plt.plot(p_val, [bernoulli_var(p) for p in p_val], color='#FFC300', label='$\mathcal{B}(p)$ variance')
plt.plot(p_val, [sub_gaussian_sigma(p) for p in p_val], color='#FF5733', label='sub-Gaussian $\sigma^2(p)$')
plt.xlabel(f'$p$')
plt.ylabel(f'variance')
plt.title(f'Variance quantities as a function of $p$')
plt.legend()
plt.savefig(f'p2_3.pdf')
plt.show()

Question 5a & 5b

see `report.pdf`

Question 5c

see `algorithms.py`

Question 5d

In [None]:
K = 2
T = 1000
n = 1000
probs = [0.5, 0.6]
sigma_square = 1./4
b = 1
ksi = 1.2
c = 1

bern_bandit = BernoulliBandit(K, probs)
ucb = UCB(bern_bandit, sigma_square)
ucb_v = UCB_V(bern_bandit, b, ksi, c)

In [None]:
ucb_regrets = []
ucb_cum_regrets = np.zeros((n,T))
for i in range(n):
    ucb.reset()
    ucb.run(T, n_run=i, ntot_run=n)
    ucb_regrets.append(ucb.regret)
    ucb_cum_regrets[i] = ucb.cum_regret

run 1000/1000 | running step 1000/1000

In [None]:
ucb_v_regrets = []
ucb_v_cum_regrets = np.zeros((n,T))
for i in range(n):
    ucb_v.reset()
    ucb_v.run(T, n_run=i, ntot_run=n)
    ucb_v_regrets.append(ucb_v.regret)
    ucb_v_cum_regrets[i] = ucb_v.cum_regret

run 1000/1000 | running step 1000/1000

In [None]:
regret_dict = {
    'UCB': [ucb_cum_regrets, '#3A6FB6'],
    'UCB-V': [ucb_v_cum_regrets, '#413AD0'],
}

In [None]:
title = 'Average cumulative regret over time ($p_1 = 0.5, p_2 = 0.6$)'
plot_multiple_avg_regrets(regret_dict, save_fig=True, save_name='p2_5d', title=title)

Question 5e

In [None]:
probs1 = [0.1, 0.2]
bern_bandit1 = BernoulliBandit(K, probs1)
ucb1 = UCB(bern_bandit1, sigma_square)
ucb_v1 = UCB_V(bern_bandit1, b, ksi, c)

In [None]:
ucb1_regrets = []
ucb1_cum_regrets = np.zeros((n,T))
for i in range(n):
    ucb1.reset()
    ucb1.run(T)
    ucb1_regrets.append(ucb1.regret)
    ucb1_cum_regrets[i] = ucb1.cum_regret

running step 1000/1000

In [None]:
ucb_v1_regrets = []
ucb_v1_cum_regrets = np.zeros((n,T))
for i in range(n):
    ucb_v1.reset()
    ucb_v1.run(T, n_run=i, ntot_run=n)
    ucb_v1_regrets.append(ucb_v1.regret)
    ucb_v1_cum_regrets[i] = ucb_v1.cum_regret

run 1000/1000 | running step 1000/1000

In [None]:
regret_dict1 = {
    'UCB': [ucb1_cum_regrets, '#3A6FB6'],
    'UCB-V': [ucb_v1_cum_regrets, '#413AD0'],
}

In [None]:
title = 'Average cumulative regret over time ($p_1 = 0.1, p_2 = 0.2$)'
plot_multiple_avg_regrets(regret_dict1, save_fig=True, save_name='p2_5e_1', title=title)

In [None]:
probs2 = [0.0, 0.1]
bern_bandit2 = BernoulliBandit(K, probs2)
ucb2 = UCB(bern_bandit2, sigma_square)
ucb_v2 = UCB_V(bern_bandit2, b, ksi, c)

In [None]:
ucb2_regrets = []
ucb2_cum_regrets = np.zeros((n,T))
for i in range(n):
    ucb2.reset()
    ucb2.run(T, n_run=i, ntot_run=n)
    ucb2_regrets.append(ucb2.regret)
    ucb2_cum_regrets[i] = ucb2.cum_regret

run 1000/1000 | running step 1000/1000

In [None]:
ucb_v2_regrets = []
ucb_v2_cum_regrets = np.zeros((n,T))
for i in range(n):
    ucb_v2.reset()
    ucb_v2.run(T, n_run=i, ntot_run=n)
    ucb_v2_regrets.append(ucb_v2.regret)
    ucb_v2_cum_regrets[i] = ucb_v2.cum_regret

run 1000/1000 | running step 1000/1000

In [None]:
regret_dict2 = {
    'UCB': [ucb2_cum_regrets, '#3A6FB6'],
    'UCB-V': [ucb_v2_cum_regrets, '#413AD0'],
}

In [None]:
title = 'Average cumulative regret over time ($p_1 = 0, p_2 = 0.1$)'
plot_multiple_avg_regrets(regret_dict2, save_fig=True, save_name='p2_5e_2', title=title)

Question 6

see `algorithms.py`

In [None]:
K = 2
T = 1000
n = 1000
probs = [0.5, 0.6]
sigma_square = 1./4
b = 1
ksi = 1.2
c = 1

bern_bandit = BernoulliBandit(K, probs)
kl_ucb = kl_UCB(bern_bandit)

In [None]:
kl_ucb_regrets = []
kl_ucb_cum_regrets = np.zeros((n,T))
for i in range(n):
    kl_ucb.reset()
    kl_ucb.run(T, n_run=i, ntot_run=n)
    kl_ucb_regrets.append(kl_ucb.regret)
    kl_ucb_cum_regrets[i] = kl_ucb.cum_regret

run 1000/1000 | running step 1000/1000

In [None]:
regret_dict = {
    'UCB': [ucb_cum_regrets, '#3A6FB6'],
    'UCB-V': [ucb_v_cum_regrets, '#413AD0'],
    'kl-UCB': [kl_ucb_cum_regrets, '#7F41CA'],
}

In [None]:
title = 'Average cumulative regret over time ($p_1 = 0.5, p_2 = 0.6$)'
plot_multiple_avg_regrets(regret_dict, save_fig=True, save_name='p2_6e_1', title=title)

In [None]:
probs1 = [0.1, 0.2]
bern_bandit1 = BernoulliBandit(K, probs1)
kl_ucb1 = kl_UCB(bern_bandit1)

In [None]:
kl_ucb1_regrets = []
kl_ucb1_cum_regrets = np.zeros((n,T))
for i in range(n):
    kl_ucb1.reset()
    kl_ucb1.run(T, n_run=i, ntot_run=n)
    kl_ucb1_regrets.append(kl_ucb1.regret)
    kl_ucb1_cum_regrets[i] = kl_ucb1.cum_regret

run 1000/1000 | running step 1000/1000

In [None]:
regret_dict1 = {
    'UCB': [ucb1_cum_regrets, '#3A6FB6'],
    'UCB-V': [ucb_v1_cum_regrets, '#413AD0'],
    'kl-UCB': [kl_ucb1_cum_regrets, '#7F41CA'],
}

In [None]:
title = 'Average cumulative regret over time ($p_1 = 0.1, p_2 = 0.2$)'
plot_multiple_avg_regrets(regret_dict1, save_fig=True, save_name='p2_6e_2', title=title)

In [None]:
probs2 = [0.0, 0.1]
bern_bandit2 = BernoulliBandit(K, probs2)
kl_ucb2 = kl_UCB(bern_bandit2)

In [None]:
kl_ucb2_regrets = []
kl_ucb2_cum_regrets = np.zeros((n,T))
for i in range(n):
    kl_ucb2.reset()
    kl_ucb2.run(T, n_run=i, ntot_run=n)
    kl_ucb2_regrets.append(kl_ucb2.regret)
    kl_ucb2_cum_regrets[i] = kl_ucb2.cum_regret

run 1000/1000 | running step 1000/1000

In [None]:
regret_dict2 = {
    'UCB': [ucb2_cum_regrets, '#3A6FB6'],
    'UCB-V': [ucb_v2_cum_regrets, '#413AD0'],
    'kl-UCB': [kl_ucb2_cum_regrets, '#7F41CA'],
}

In [None]:
title = 'Average cumulative regret over time ($p_1 = 0, p_2 = 0.1$)'
plot_multiple_avg_regrets(regret_dict2, save_fig=True, save_name='p2_6e_3', title=title)