In [None]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.style as style
style.use('seaborn-colorblind')
import scipy.stats as sps
import networkx as nx
import time
from Dist_KL_UCB_Small_Graphs import Dist_KL_UCB
import os
from multiprocess import Pool

n_runs = 100
T = 10000
G1 = nx.complete_graph(6)
G2 = nx.cycle_graph([6,7,8,9,10,11])
G = nx.compose(G1,G2)
N = G.number_of_nodes()

nodes = list(G.nodes)
for i in nodes:
    G.add_edge(i,i)

rwd_means = [.2, .3, .4, .5, .6]
sigmas = [1, 1, 1, 0.01, 0.01, 0.01, 1, 1, 1, 0.01, 0.01, 0.01]
seeds = [i for i in range(n_runs)]

In [None]:
def worker(job_runs, seeds, sigmas, T, N, G):
    regrets_dist_kl_ucb = np.zeros((len(job_runs), N, T))
    for run in job_runs:
        start_run_t = time.time()
        
        sd = 0.1
        kl_distributions = [sps.truncnorm(a=(0-rwd_means[i])/sd, b=(1-rwd_means[i])/sd, loc=rwd_means[i], scale=sd) for i in range(len(rwd_means))]
        for i in range(len(rwd_means)):
            kl_distributions[i].random_state = np.random.RandomState(seed=seeds[run])
        distributions = [[sps.truncnorm(a=(0-rwd_means[i])/sd, b=(1-rwd_means[i])/sd, loc=rwd_means[i], scale=sd) for i in range(len(rwd_means))] for n in range(N)]
        for n in range(N):
            for i in range(len(rwd_means)):
                distributions[n][i].random_state = np.random.RandomState(seed=seeds[run])
        
        distkl = Dist_KL_UCB(T, distributions, G, sigma=sigmas)
        distkl.run()
        regrets_dist_kl_ucb[run-job_runs[0], :, :] = distkl.regrets
        
        end_run_t = time.time()
        print(f'finished run {run} in {end_run_t - start_run_t}sec')
    return regrets_dist_kl_ucb

In [None]:
regrets_dist_kl_ucb = np.zeros((n_runs, N, T))
init_time = time.time()
cpus = os.cpu_count()
init_time = time.time()

pool = Pool()
jobs = list(range(n_runs))
job_size = n_runs // cpus
job_chunks = [(jobs[i:i + job_size], seeds, sigmas, T, N, G) for i in range(0, len(jobs), job_size)]
results = pool.starmap(worker, job_chunks)
pool.close()
pool.join()
regrets_dist_kl_ucb_unconcat = [result for result in results]
np.concatenate(regrets_dist_kl_ucb_unconcat, out=regrets_dist_kl_ucb)

end_run_t = time.time()

time_axis = list(range(T))
mean_regrets_over_all_runs_dist_kl_ucb = np.mean(regrets_dist_kl_ucb, axis=0)
std_regrets_over_all_runs_dist_kl_ucb = np.std(regrets_dist_kl_ucb, axis=0)
print(f'Total run time = {end_run_t - init_time}sec')

In [None]:
plt.figure(figsize=(10,5))

avg_3_1_regret = np.mean(mean_regrets_over_all_runs_dist_kl_ucb[0:3], axis=0)
avg_3_001_regret = np.mean(mean_regrets_over_all_runs_dist_kl_ucb[3:6], axis=0)
avg_6_1_regret = np.mean(mean_regrets_over_all_runs_dist_kl_ucb[6:9], axis=0)
avg_6_001_regret = np.mean(mean_regrets_over_all_runs_dist_kl_ucb[9:12], axis=0)
std_3_1_regret = np.mean(std_regrets_over_all_runs_dist_kl_ucb[0:3], axis=0)
std_3_001_regret = np.mean(std_regrets_over_all_runs_dist_kl_ucb[3:6], axis=0)
std_6_1_regret = np.mean(std_regrets_over_all_runs_dist_kl_ucb[6:9], axis=0)
std_6_001_regret = np.mean(std_regrets_over_all_runs_dist_kl_ucb[9:12], axis=0)

plt.plot(time_axis, avg_3_1_regret, label="Group 1")
plt.fill_between(time_axis, avg_3_1_regret-std_3_1_regret, avg_3_1_regret+std_3_1_regret,alpha=.2)
plt.plot(time_axis, avg_3_001_regret, label="Group 2")
plt.fill_between(time_axis, avg_3_001_regret-std_3_001_regret, avg_3_001_regret+std_3_001_regret,alpha=.2)
plt.plot(time_axis, avg_6_1_regret, label="Group 3")
plt.fill_between(time_axis, avg_6_1_regret-std_6_1_regret, avg_6_1_regret+std_6_1_regret,alpha=.2)
plt.plot(time_axis, avg_6_001_regret, label="Group 4")
plt.fill_between(time_axis, avg_6_001_regret-std_6_001_regret, avg_6_001_regret+std_6_001_regret,alpha=.2)
plt.xlabel("Time")
plt.ylabel("Regret")
plt.legend()
plt.grid()

plt.savefig("dist_KL_UCB_small_graphs_all_together_N12.pdf", bbox_inches='tight')