# Let's Explore these Sampling Algorithms

In [None]:
library(tidyverse)


Okay so if we want to explore properties of sampling algorithms, we need a known multi-armed bandit problem. To specify a multi-armed bandit, we need to know the number of arms $k$ and the reward probabilities of each arm $R_k$. In order to do some simulations, we also need to choose a horizon $N$, which the number of trials.

Because we need a reward probability for each arm, the number of arms is implicit in the length of the reward vector. So we can represent a multi-armed bandit problem as a reward vector $R_k$.


In [None]:
# Let's start off with 

N <- NA

mab_0 <- NA



## $\epsilon$ greedy sampling

In epsilon greedy sampling, we sample randomly $\epsilon$ percent of the time, which allows us to do some exploring. Otherwise, we choose the options with the most wins.


In [None]:
epsilon_greedy = function(arms, N, epsilon){
    trials = NULL
    outcomes <- rep(1, length(arms))
    choices <- rep(2, length(arms))
    for(i in 1:N){
        if(runif(1) < epsilon){
            choice <- sample(1:length(arms), 1)
        } else {
            valid_arms <- which(outcomes/choices == max(outcomes/choices))
            choice <- ifelse(length(valid_arms)==1, valid_arms, sample(valid_arms, 1))
        }
        outcome = rbinom(1, 1, arms[choice])
        choices[choice] <- choices[choice] + 1
        outcomes[choice] <- outcomes[choice] + outcome
        trials = rbind(trials,
                       data.frame(trial=i, choice=choice, outcome=outcome, optimal=max(arms), epsilon=epsilon))
    }
  trials
}



## Win Stay, Lose Sample

The advantage of this approach is you never have to remember anything more than the last trial.


In [None]:
wsls <- function(arms, N){
    outcomes <- rep(0, length(arms))
    choices <- rep(0, length(arms))
    
    choice <- sample(1:length(arms), 1)
    outcome <- rbinom(1, 1, arms[choice])
    
    choices[choice] <- choices[choice] + 1
    outcomes[choice] <- outcomes[choice] + outcome
    trials = data.frame(trial=1, choice=choice, outcome=outcome, optimal=max(arms))
    
    for(i in 2:N){
        if(outcome==1){
            outcome <- rbinom(1, 1, arms[choice])
        } else {
            choice <- sample(1:length(arms), 1)
            outcome <- rbinom(1, 1, arms[choice])
        }
        choices[choice] <- choices[choice] + 1
        outcomes[choice] <- outcomes[choice] + outcome
        trials = rbind(trials,
                       data.frame(trial=i, choice=choice, outcome=outcome, optimal=max(arms)))
        
    }
    trials
}


# Thompson Sampling

In [None]:
thompson <- function(arms, N){
    trials = NULL
    outcomes <- rep(0, length(arms))
    choices <- rep(0, length(arms))
    
    alphas <- rep(1, length(arms))
    betas <- rep(1, length(arms))
    for(i in 1:N){
        thetas <- rbeta(length(arms), alphas, betas)
        valid_arms <- which(thetas == max(thetas))
        choice <- ifelse(length(valid_arms)==1, valid_arms, sample(valid_arms, 1))
        outcome <- rbinom(1, 1, arms[choice])
        if(outcome==1){
            alphas[choice] = alphas[choice] + 1
        } else {
            betas[choice] = betas[choice] + 1
        }
        choices[choice] <- choices[choice] + 1
        outcomes[choice] <- outcomes[choice] + outcome
        trials = bind_rows(trials,
                       tibble(trial=i, choice=choice, outcome=outcome, optimal=max(arms)) %>%
                           bind_cols(as_tibble_row(setNames(alphas/(alphas+betas), paste0('B', 1:length(arms))))) %>%
                           bind_cols(as_tibble_row(setNames(arms, paste0('A', 1:length(arms)))))
                          )
    }
    trials
}
