In [18]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.style as style
style.use('tableau-colorblind10')
import scipy.stats as sps
import networkx as nx
import time
from numpy.random import Generator, PCG64
from Dist_KL_UCB import Dist_KL_UCB
from Dist_UCB1 import Dist_UCB1

n_runs = 100
T = 10000
N = 30
rwd_means = [.2, .25, .3, .35, .4, .45, .5, .55, .6, .65]

seeds = []
myseed = 0
while len(seeds) < n_runs:
    G = nx.fast_gnp_random_graph(N, 0.5, directed=False, seed=myseed)
    if nx.is_connected(G):
        seeds.append(myseed)
    myseed += 1
    
distributions = [[sps.beta(rwd_means[i], 1-rwd_means[i]) for i in range(len(rwd_means))] for n in range(N)]
for n in range(N):
    for i in range(len(rwd_means)):
        distributions[n][i].random_state = np.random.RandomState(seed=1)
sigmas = [1, .1, .01]

In [None]:
mean_regrets_dist_kl_ucb = np.zeros((n_runs, len(sigmas), T))
mean_regrets_dist_ucb1 = np.zeros((n_runs, len(sigmas), T))
init_time = time.time()
for run in range(n_runs):
    start_run_t = time.time()
    G = nx.fast_gnp_random_graph(N, 0.5, directed=False, seed=seeds[run])
    nodes = list(G.nodes)
    for i in nodes:
        G.add_edge(i,i) 
    for sig in range(len(sigmas)):
        distkl = Dist_KL_UCB(T, distributions, G, sigma=sigmas[sig])
        distkl.run()
        mean_regrets_dist_kl_ucb[run, sig, :] = np.mean(distkl.regrets, axis=0)
        distucb1 = Dist_UCB1(T, distributions, G, beta=sigmas[sig])
        distucb1.run()
        mean_regrets_dist_ucb1[run, sig, :] = np.mean(distucb1.regrets, axis=0)
    end_run_t = time.time()
    print(f'finished run {run} in {end_run_t - start_run_t}sec')

time_axis = list(range(distkl.T))
mean_regrets_over_all_runs_dist_kl_ucb = np.mean(mean_regrets_dist_kl_ucb, axis=0)
std_regrets_over_all_runs_dist_kl_ucb = np.std(mean_regrets_dist_kl_ucb, axis=0)
mean_regrets_over_all_runs_dist_ucb1 = np.mean(mean_regrets_dist_ucb1, axis=0)
std_regrets_over_all_runs_dist_ucb1 = np.std(mean_regrets_dist_ucb1, axis=0)
print(f'Total run time = {end_run_t - init_time}sec')

In [None]:
linestyles = ['-.', '-', '--']
for sig in range(len(sigmas)):
    plt.plot(time_axis, mean_regrets_over_all_runs_dist_kl_ucb[sig], linestyles[sig], label=r"$\varsigma=$" + str(sigmas[sig]))
    plt.fill_between(time_axis, mean_regrets_over_all_runs_dist_kl_ucb[sig]-std_regrets_over_all_runs_dist_kl_ucb[sig], mean_regrets_over_all_runs_dist_kl_ucb[sig]+std_regrets_over_all_runs_dist_kl_ucb[sig],alpha=.2)
for sig in range(len(sigmas)):    
    plt.plot(time_axis, mean_regrets_over_all_runs_dist_ucb1[sig], linestyles[sig], label=r"$\beta=$" + str(sigmas[sig]))
    plt.fill_between(time_axis, mean_regrets_over_all_runs_dist_ucb1[sig]-std_regrets_over_all_runs_dist_ucb1[sig], mean_regrets_over_all_runs_dist_ucb1[sig]+std_regrets_over_all_runs_dist_ucb1[sig],alpha=.2)
plt.xlabel("Time")
plt.ylabel("Regret")
plt.legend()
plt.grid()
plt.savefig("./.out/dist_KL_vs_dist_UCB1.pdf", bbox_inches='tight')