In [1]:
import os
import pandas as pd
import numpy as np

import random
from bandit_algorithms import EpsilonGreedy, EpsilonDecay, Softmax, AnnealingSoftmax, UCB1, BayesUCB, ThompsonSampling

# 1. Load Data

In [2]:
df = pd.read_csv('../Data/events_cleaned_1.csv')
os.chdir('../Results/Reward Distribution 1')

# df = pd.read_csv('../Data/events_cleaned_2.csv')
# os.chdir('../Results/Reward Distribution 2')

# df = pd.read_csv('../Data/events_cleaned_3.csv')
# os.chdir('../Results/Reward Distribution 3')

pd.set_option('display.max_columns', None)
df = df.sample(n=50000, random_state=0)
df.head()

Unnamed: 0,electronics,computers,construction,auto,appliances,stationery,furniture,kids,accessories,country_yard,apparel,medicine,sport,jewelry
511916,0,0,0,0,0,0,0,0,0,0,0,0,0,0
232416,0,0,0,0,0,0,0,0,0,0,0,0,0,0
465310,0,0,0,0,0,0,0,0,0,0,0,0,0,0
129654,0,0,0,0,0,0,0,0,0,0,0,0,0,0
170539,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [3]:
df.shape

(50000, 14)

# 2. Simulate Algorithms

In [4]:
def test_algorithm_1(algo, num_arms, num_sims, horizon, df):
    chosen_arms = [0 for i in range(num_sims*horizon)]
    rewards = [0 for i in range(num_sims*horizon)]
    cumulative_rewards = [0 for i in range(num_sims*horizon)]
    sim_rounds = [0 for i in range(num_sims*horizon)]
    time_steps = [0 for i in range(num_sims*horizon)]
    
    for sim in range(num_sims):
        sim += 1
        algo.initialize(num_arms)
        
        for t in range(horizon):
            t += 1
            index = (sim-1)*horizon + t - 1
            sim_rounds[index] = sim
            time_steps[index] = t
            
            chosen_arm = algo.select_arm()
            chosen_arms[index] = chosen_arm
            reward = df.values[t-1, chosen_arm]
            rewards[index] = reward
            
            if t == 1:
                cumulative_rewards[index] = reward
            else:
                cumulative_rewards[index] = cumulative_rewards[index-1] + reward 
            
            algo.update(chosen_arm, reward)
            
    return(sim_rounds, time_steps, chosen_arms, rewards, cumulative_rewards)

In [5]:
def test_algorithm_2(algo, num_arms, num_sims, horizon, df): # for softmax and annealing softmax
    chosen_arms = [0 for i in range(num_sims*horizon)]
    rewards = [0 for i in range(num_sims*horizon)]
    cumulative_rewards = [0 for i in range(num_sims*horizon)]
    sim_rounds = [0 for i in range(num_sims*horizon)]
    time_steps = [0 for i in range(num_sims*horizon)]
    
    for sim in range(num_sims):
        sim += 1
        algo.initialize(num_arms)
        
        for t in range(horizon):
            t += 1
            index = (sim-1)*horizon + t - 1
            sim_rounds[index] = sim
            time_steps[index] = t
            
            chosen_arm = algo.select_arm(num_arms)
            chosen_arms[index] = chosen_arm
            reward = df.values[t-1, chosen_arm]
            rewards[index] = reward
            
            if t == 1:
                cumulative_rewards[index] = reward
            else:
                cumulative_rewards[index] = cumulative_rewards[index-1] + reward 
            
            algo.update(chosen_arm, reward)
            
    return(sim_rounds, time_steps, chosen_arms, rewards, cumulative_rewards)

# initialise testing arguments
N_ARMS = df.shape[1] # no. of categories
N_SIMS = 100
N_STEPS = df.shape[0] # no. of events

In [6]:
# initialise testing arguments
N_ARMS = df.shape[1] # no. of categories
N_SIMS = 100
N_STEPS = df.shape[0] # no. of events

## 2.1. Epsilon-Greedy Algorithm

In [7]:
random.seed(0) # to regenerate the same results

# run simulation
for epsilon in [0.2, 0.4, 0.6, 0.8]:
    eg_algo = EpsilonGreedy(epsilon, [], [])
    eg_algo.initialize(N_ARMS)
    eg_results = pd.DataFrame(test_algorithm_1(eg_algo, N_ARMS, N_SIMS, N_STEPS, df)).transpose()
    eg_results.columns = ['n_sim', 'n_step', 'chosen_arm', 'rewards', 'cumulative_rewards']
    eg_results['type'] = epsilon
    
    # export results as csv file
    if os.path.exists('Epsilon-Greedy/eg_results.csv'):
        eg_results.to_csv('Epsilon-Greedy/eg_results.csv', mode='a', header=None, index=False)
    else:
        eg_results.to_csv('Epsilon-Greedy/eg_results.csv', index=False)

eg_results

Unnamed: 0,n_sim,n_step,chosen_arm,rewards,cumulative_rewards,type
0,1,1,1,0,0,0.8
1,1,2,2,0,0,0.8
2,1,3,5,0,0,0.8
3,1,4,0,0,0,0.8
4,1,5,3,0,0,0.8
...,...,...,...,...,...,...
4999995,100,49996,4,0,396,0.8
4999996,100,49997,13,0,396,0.8
4999997,100,49998,1,0,396,0.8
4999998,100,49999,1,0,396,0.8


## 2.2. Epsilon-Decay Algorithm

In [8]:
random.seed(0)

# run simulation
ed_algo = EpsilonDecay([], [])
ed_algo.initialize(N_ARMS)
ed_results = pd.DataFrame(test_algorithm_1(ed_algo, N_ARMS, N_SIMS, N_STEPS, df)).transpose()
ed_results.columns = ['n_sim', 'n_step', 'chosen_arm', 'rewards', 'cumulative_rewards']
ed_results['type'] = 'eps_decay'

# export results as csv file
ed_results.to_csv('Epsilon-Decay/ed_results.csv', index=False)
ed_results

Unnamed: 0,n_sim,n_step,chosen_arm,rewards,cumulative_rewards,type
0,1,1,12,0,0,eps_decay
1,1,2,0,0,0,eps_decay
2,1,3,8,0,0,eps_decay
3,1,4,12,0,0,eps_decay
4,1,5,0,0,0,eps_decay
...,...,...,...,...,...,...
4999995,100,49996,1,0,1296,eps_decay
4999996,100,49997,1,0,1296,eps_decay
4999997,100,49998,1,0,1296,eps_decay
4999998,100,49999,1,0,1296,eps_decay


## 2.3. Softmax Algorithm

In [7]:
random.seed(0)

# run simulation
for temperature in [0.01, 0.5]:
    sm_algo = Softmax(temperature, [], [])
    sm_algo.initialize(N_ARMS)
    sm_results = pd.DataFrame(test_algorithm_2(sm_algo, N_ARMS, N_SIMS, N_STEPS, df)).transpose()
    sm_results.columns = ['n_sim', 'n_step', 'chosen_arm', 'rewards', 'cumulative_rewards']
    sm_results['type'] = temperature
    
    # export results as csv file
    if os.path.exists('Softmax/sm_results.csv'):
        sm_results.to_csv('Softmax/sm_results.csv', mode='a', header=None, index=False)
    else:
        sm_results.to_csv('Softmax/sm_results.csv', index=False)

sm_results

Unnamed: 0,n_sim,n_step,chosen_arm,rewards,cumulative_rewards,type
0,1,1,4,0,0,0.5
1,1,2,3,0,0,0.5
2,1,3,10,0,0,0.5
3,1,4,10,0,0,0.5
4,1,5,4,0,0,0.5
...,...,...,...,...,...,...
4999995,100,49996,1,0,180,0.5
4999996,100,49997,1,0,180,0.5
4999997,100,49998,6,0,180,0.5
4999998,100,49999,2,0,180,0.5


## 2.4. Annealing Softmax Algorithm

In [10]:
random.seed(0)

# run simulation
asm_algo = AnnealingSoftmax([], [])
asm_algo.initialize(N_ARMS)
asm_results = pd.DataFrame(test_algorithm_2(asm_algo, N_ARMS, N_SIMS, N_STEPS, df)).transpose()
asm_results.columns = ['n_sim', 'n_step', 'chosen_arm', 'rewards', 'cumulative_rewards']
asm_results['type'] = 'ann_softmax'

# export results as csv file
asm_results.to_csv('Annealing Softmax/asm_results.csv', index=False)
asm_results

## 2.5. UCB1 Algorithm

In [8]:
random.seed(0)

# run simulation
ucb1_algo = UCB1([], [])
ucb1_algo.initialize(N_ARMS)
ucb1_results = pd.DataFrame(test_algorithm_1(ucb1_algo, N_ARMS, N_SIMS, N_STEPS, df)).transpose()
ucb1_results.columns = ['n_sim', 'n_step', 'chosen_arm', 'rewards', 'cumulative_rewards']
ucb1_results['type'] = 'ucb1'

# export results as csv file
ucb1_results.to_csv('UCB1/ucb1_results.csv', index=False)
ucb1_results

Unnamed: 0,n_sim,n_step,chosen_arm,rewards,cumulative_rewards,type
0,1,1,0,0,0,ucb1
1,1,2,1,0,0,ucb1
2,1,3,2,0,0,ucb1
3,1,4,3,0,0,ucb1
4,1,5,4,0,0,ucb1
...,...,...,...,...,...,...
4999995,100,49996,12,0,245,ucb1
4999996,100,49997,13,0,245,ucb1
4999997,100,49998,0,0,245,ucb1
4999998,100,49999,1,0,245,ucb1


## 2.6. Bayesian UCB Algorithm

In [9]:
random.seed(0)

# run simulation
bucb_algo = BayesUCB([], [])
bucb_algo.initialize(N_ARMS)
bucb_results = pd.DataFrame(test_algorithm_1(bucb_algo, N_ARMS, N_SIMS, N_STEPS, df)).transpose()
bucb_results.columns = ['n_sim', 'n_step', 'chosen_arm', 'rewards', 'cumulative_rewards']
bucb_results['type'] = 'bayes_ucb'

# export results as csv file
bucb_results.to_csv('Bayesian UCB/bucb_results.csv', index=False)
bucb_results

Unnamed: 0,n_sim,n_step,chosen_arm,rewards,cumulative_rewards,type
0,1,1,0,0,0,bayes_ucb
1,1,2,1,0,0,bayes_ucb
2,1,3,2,0,0,bayes_ucb
3,1,4,3,0,0,bayes_ucb
4,1,5,4,0,0,bayes_ucb
...,...,...,...,...,...,...
4999995,100,49996,1,0,1272,bayes_ucb
4999996,100,49997,1,0,1272,bayes_ucb
4999997,100,49998,1,0,1272,bayes_ucb
4999998,100,49999,1,0,1272,bayes_ucb


## 2.7. Thompson Sampling

In [13]:
random.seed(0)

# run simulation
thomp_algo = ThompsonSampling([], [])
thomp_algo.initialize(N_ARMS)
thomp_results = pd.DataFrame(test_algorithm_1(thomp_algo, N_ARMS, N_SIMS, N_STEPS, df)).transpose()
thomp_results.columns = ['n_sim', 'n_step', 'chosen_arm', 'rewards', 'cumulative_rewards']
thomp_results['type'] = 'thomp_sampling'

# export results as csv file
thomp_results.to_csv('Thompson Sampling/thomp_results.csv', index=False)
thomp_results