# Bayesian bandits

Notebook describing our efforts on a Bayesian approach to bandits
## Set-up

1. Create this notebook via Jupyter in ~/Columbia/academic/src/bayesianBandits
2. Create GitHub public (empty) repository
3. Create local Git repository:
    * \$ git init
    * \$ add BayesianBandits.ipynb 
    * \$ touch BayesianBandits.py
    * \$ git add BayesianBandits.py
    * \$ git commit
    * \$ git remote add origin https://github.com/iurteaga/BayesianBandits
    * \$ git push -u origin master




# Bayesian bandits simulations

General simulation set up

In [1]:
import numpy as np
import scipy.stats as stats

In [2]:
from BayesianBandits import * 

In [3]:
#Number of realizations
R=pow(10,3)
# Time instants to run
t_max=pow(10,3)

## Different methodologies

In [None]:
# Bandit
K=3
reward_function=stats.bernoulli
theta=(np.array([[0.1],[0.15],[0.2]]),)
returns_expected=reward_function.mean(theta)
reward_prior={'function': stats.beta, 'alpha': np.ones((3,1)), 'beta': np.ones((3,1))}

# Optimal bandit
optimal_bandit=OptimalBandit(K, reward_function, theta)

# Monte Carlo bandit
M=100
mc_bandit=BayesianBanditMonteCarlo(K, reward_function, theta, reward_prior, M)
# Numerical bandit
M=100
num_bandit=BayesianBanditNumerical(K, reward_function, theta, reward_prior, M)
# Hybrid Monte Carlo bandit
M=100
hmc_bandit=BayesianBanditHybridMonteCarlo(K, reward_function, theta, reward_prior, M)

# All bandits
bandits=[optimal_bandit, num_bandit, mc_bandit, hmc_bandit]
bandits_labels=['Optimal', 'Numerical', 'Monte Carlo', 'Hybrid Monte Carlo']
bandits_colors=['y', 'b', 'g', 'r', 'g', 'c', 'm']
# Execution
bandits_returns, bandits_actions, bandits_predictive=execute_bandits(K, bandits, R, t_max)


In [None]:
# Plotting
plot_std=True
t_plot=t_max
#t_plot=25
plot_bandits(returns_expected, bandits_returns, bandits_actions, bandits_predictive, bandits_colors, bandits_labels, t_plot, plot_std)

## Different Monte Carlo sample sizes

In [None]:
# Bandit
K=3
reward_function=stats.bernoulli
theta=(stats.uniform.rvs(size=K)/2,)
returns_expected=reward_function.mean(theta)
reward_prior={'function': stats.beta, 'alpha': np.ones((K,1)), 'beta': np.ones((K,1))}

# Monte Carlo bandits
M_samples=np.array([1, 1000])
bandits=[]
bandits_labels=[]
for n in np.arange(M_samples.size):
    bandits.append(BayesianBanditMonteCarlo(K, reward_function, theta, reward_prior, M_samples[n]))
    bandits_labels.append('M={}'.format(M_samples[n]))
bandits_colors=['b', 'g', 'r', 'm', 'c', 'y']
# Execution
bandits_returns, bandits_actions, bandits_predictive=execute_bandits(K, bandits, R, t_max)


In [None]:
print(theta)
print(returns_expected.argmax())

In [None]:
# Plotting All
# With variance
plot_std=True
t_plot=t_max
plot_bandits(returns_expected, bandits_returns, bandits_actions, bandits_predictive, bandits_colors, bandits_labels, t_plot, plot_std)

In [None]:
# Plotting All
# With No variance
plot_std=False
t_plot=t_max
plot_bandits(returns_expected, bandits_returns, bandits_actions, bandits_predictive, bandits_colors, bandits_labels, t_plot, plot_std)

In [None]:
# Plotting initial
# With variance
plot_std=True
t_plot=100
plot_bandits(returns_expected, bandits_returns, bandits_actions, bandits_predictive, bandits_colors, bandits_labels, t_plot, plot_std)

In [None]:
# Plotting Initial
# With No variance
plot_std=False
t_plot=100
plot_bandits(returns_expected, bandits_returns, bandits_actions, bandits_predictive, bandits_colors, bandits_labels, t_plot, plot_std)