In [7]:
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
import helpers.mab_bernoulli as brn
import matplotlib.pyplot as plt
import threading

In [28]:
class bigsim_threaded():
    def __init__(self, T, K_list, delta_list, data_amt_list, num_samples):
        self.T = T
        self.tasks = [[K, delta, data_amt] for K in K_list for delta in delta_list for data_amt in data_amt_list]
        self.num_samples = num_samples
        self.next_task = 0
        self.task_lock = threading.Lock()
        self.df_lock = threading.Lock()

    def gen_dfs(self):
        df_cols = ["NumArms", "ArmGap", "ArmData", "NumSamples"]
        c = list(range(1, self.T + 1))
        df_cols = df_cols + c + ["low_"+str(n) for n in c] + ["high_"+str(n) for n in c]
        self.fs_df = pd.DataFrame(columns=df_cols)
        self.ar_df = pd.DataFrame(columns=df_cols)

    def load_dfs(self, fs_df, ar_df):
        self.fs_df = fs_df
        self.ar_df = ar_df
    
    def single_sim_wrapper(self, K, delta, data_amt):      
        arms = [0.5 - delta/2 for _ in range(K-1)]
        opt_mean = 0.5 + delta / 2
        arms.append(opt_mean)

        regret_vecs = [np.zeros((self.num_samples, self.T)) for _ in range(2)]
        for i in tqdm(range(self.num_samples), leave = False):
            arm_data = [brn.gen_data(arms[i], data_amt) for i in range(K)]
            fs_rewards = brn.FS_path(self.T, arms, brn.flat_priors(K), arm_data)
            ar_rewards = brn.AR_path(self.T, arms, brn.flat_priors(K), arm_data)
    
            fs_regrets = opt_mean - np.array(fs_rewards)
            ar_regrets = opt_mean - np.array(ar_rewards)
    
            regret_vecs[0][i] = np.cumsum(fs_regrets)
            regret_vecs[1][i] = np.cumsum(ar_regrets)
    
        avgs = np.mean(regret_vecs, axis=1)
        lower_CBs = avgs-2*np.std(regret_vecs,axis=1)/np.sqrt(self.num_samples)
        upper_CBs = avgs+2*np.std(regret_vecs,axis=1)/np.sqrt(self.num_samples)
        
        with self.df_lock:
            self.fs_df.loc[len(fs_df)] = [K, delta, data_amt, self.num_samples] + list(avgs[0]) + list(lower_CBs[0]) + list(upper_CBs[0])
            self.ar_df.loc[len(ar_df)] = [K, delta, data_amt, self.num_samples] + list(avgs[1]) + list(lower_CBs[1]) + list(upper_CBs[1])

    def get_new_task(self):
        if(self.next_task >= len(self.tasks)):
            return None
        with self.task_lock:
            next_task_attr = self.tasks[self.next_task]
            self.next_task += 1
            print("starting next task #" + str(self.next_task) + ": " + str(next_task_attr))
        return next_task_attr

    def complete_tasks(self):
        task_attr = self.get_new_task()
        while task_attr is not None:
            self.single_sim_wrapper(task_attr[0], task_attr[1], task_attr[2])
            task_attr = self.get_new_task()
            
    def save_dfs(self):
        self.fs_df.to_csv("bern_fs_df.csv")
        self.ar_df.to_csv("bern_ar_df.csv")

In [24]:
#parameter listing
T = 200 #simulation horizon
K_list = np.arange(2, 5, 2) #number of arms
delta_list = [0.2, 0.3] #reward gap between best and worst arms
data_amt_list = np.arange(20, 30, 5) #number of data points per arm
num_samples = 100
bst = bigsim_threaded(T, K_list, delta_list, data_amt_list, num_samples)
bst.gen_dfs()
num_threads = 9
threads = [threading.Thread(target=bst.complete_tasks, args=()) for _ in range(num_threads)]

In [25]:
for t in threads:
    t.start()
for t in threads:
    t.join()
    

starting next task #1: [2, 0.2, 20]
starting next task #2: [2, 0.2, 25]
starting next task #3: [2, 0.3, 20]
starting next task #4: [2, 0.3, 25]
starting next task #5: [4, 0.2, 20]
starting next task #6: [4, 0.2, 25]
starting next task #7: [4, 0.3, 20]
starting next task #8: [4, 0.3, 25]


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]