In [1]:
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
import helpers.mab_bernoulli as brn
import matplotlib.pyplot as plt
import multiprocess as mp
import time

In [49]:
def single_sim_wrapper(task): 
    T = task[0]
    K = task[1]
    delta = task[2]
    data_amt = task[3]
    num_samples = task[4]
    
    arms = [0.5 - delta/2 for _ in range(K-1)]
    opt_mean = 0.5 + delta / 2
    arms.append(opt_mean)

    PERC_FREQ = 0.5
    print_intvl = int(num_samples*PERC_FREQ/100)
    print_intvl = print_intvl if print_intvl > 0 else 1

    regret_vecs = [np.zeros((num_samples, T)) for _ in range(2)]
    for i in range(num_samples):
        arm_data = [brn.gen_data(arms[i], data_amt) for i in range(K)]
        fs_rewards = brn.FS_path(T, arms, brn.flat_priors(K), arm_data)
        ar_rewards = brn.AR_path(T, arms, brn.flat_priors(K), arm_data)
    
        fs_regrets = opt_mean - np.array(fs_rewards)
        ar_regrets = opt_mean - np.array(ar_rewards)
    
        regret_vecs[0][i] = np.cumsum(fs_regrets)
        regret_vecs[1][i] = np.cumsum(ar_regrets)
        
        if((i+1)%(print_intvl) == 0):
            print_lock.acquire()
            whitespace = " " * 20
            print("Task " + str(task) + " is " + str((i+1)/num_samples*100) + "% completed." + whitespace , end='\r')
            print_lock.release()
    
    avgs = np.mean(regret_vecs, axis=1)
    lower_CBs = avgs-2*np.std(regret_vecs,axis=1)/np.sqrt(num_samples)
    upper_CBs = avgs+2*np.std(regret_vecs,axis=1)/np.sqrt(num_samples)
        
    fs_row = [K, delta, data_amt, num_samples] + list(avgs[0]) + list(lower_CBs[0]) + list(upper_CBs[0])
    ar_row = [K, delta, data_amt, num_samples] + list(avgs[1]) + list(lower_CBs[1]) + list(upper_CBs[1])
    return fs_row, ar_row

def gen_dfs(T):
    df_cols = ["NumArms", "ArmGap", "ArmData", "NumSamples"]
    c = list(range(1, T + 1))
    df_cols = df_cols + c + ["low_"+str(n) for n in c] + ["high_"+str(n) for n in c]
    fs_df = pd.DataFrame(columns=df_cols)
    ar_df = pd.DataFrame(columns=df_cols)
    return fs_df, ar_df

In [50]:
#parameter listing
T = 5000 #simulation horizon
K_list = [4] #number of arms
delta_list = [0.2] #reward gap between best and worst arms
data_amt_list = [30, 35, 40, 45, 50] #number of data points per arm
num_samples = 100
tasks = [[T, K, delta, data_amt, num_samples] for K in K_list for delta in delta_list for data_amt in data_amt_list]

In [51]:
print_lock = mp.Lock()
start = time.time()
p = mp.Pool()
result_rows = p.map(single_sim_wrapper, tasks)
print()
print(time.time()-start)

Task [5000, 4, 0.2, 40, 100] is 100.0% completed.                                 
22.495846033096313


In [34]:
#Update stored DataFrame
#fs_df, ar_df = gen_dfs(T)
fs_df = pd.read_csv("bern_fs_df.csv")
ar_df = pd.read_csv("bern_ar_df.csv")
for fs_res, ar_res in result_rows:
    fs_df.loc[len(fs_df)] = fs_res
    ar_df.loc[len(ar_df)] = ar_res
#fs_df.to_csv("bern_fs_df.csv")
#ar_df.to_csv("bern_ar_df.csv")

Unnamed: 0,NumArms,ArmGap,ArmData,NumSamples,1,2,3,4,5,6,...,high_4991,high_4992,high_4993,high_4994,high_4995,high_4996,high_4997,high_4998,high_4999,high_5000
0,4.0,0.2,30.0,200.0,0.06,0.12,0.105,0.155,0.23,0.205,...,29.050871,29.056396,29.041582,28.996949,29.006709,28.96798,28.949979,28.938417,28.943796,28.947094
1,4.0,0.2,35.0,200.0,0.035,0.145,0.145,0.18,0.22,0.285,...,21.852031,21.809999,21.822898,21.839744,21.885675,21.910113,21.876468,21.878534,21.922474,21.905083
2,4.0,0.2,40.0,200.0,0.045,0.08,0.205,0.27,0.255,0.33,...,20.608362,20.644585,20.650173,20.617423,20.631808,20.678585,20.621464,20.640083,20.692247,20.654606
3,4.0,0.2,45.0,200.0,0.04,0.06,0.09,0.16,0.215,0.235,...,19.613207,19.569725,19.492434,19.453836,19.450908,19.484222,19.516355,19.521661,19.574126,19.540961
4,4.0,0.2,50.0,200.0,0.05,0.03,0.045,0.19,0.195,0.24,...,21.835639,21.856902,21.879682,21.897317,21.911161,21.910623,21.847919,21.881193,21.849526,21.879673
