In [None]:
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as sps
import networkx as nx
from Dist_UCB1 import Dist_UCB1
from UCB1 import UCB1

n_runs = 10
T = 5000
N = 20
rwd_means = [.2, .25, .3, .35, .4, .45, .5, .55, .6, .65]
kl_distributions = [sps.beta(rwd_means[i], rwd_means[i]+1) for i in range(len(rwd_means))]
distributions = [[sps.beta(rwd_means[i], rwd_means[i]+1) for i in range(len(rwd_means))] for n in range(N)]
betas = [1, .1, .01]

In [None]:
mean_regrets_dist_ucb1 = np.zeros((n_runs, len(betas), T))
regrets_ucb1 = np.zeros((n_runs, T))
for run in range(n_runs):
    print(f'starting run {run}')
    G = nx.fast_gnp_random_graph(N, 0.5, directed=False)
    while not nx.is_connected(G):
        G = nx.fast_gnp_random_graph(N, 0.5, directed=False)
    nodes = list(G.nodes)
    for i in nodes:
        G.add_edge(i,i) 
    for b in range(len(betas)):
        distucb1 = Dist_UCB1(T, distributions, G, beta=betas[b])
        distucb1.run()
        mean_regrets_dist_ucb1[run, b, :] = np.mean(distucb1.regrets, axis=0)

    ucb1 = UCB1(T, kl_distributions)
    ucb1.run()
    regrets_ucb1[run, :] = ucb1.regret

time_axis = list(range(T))
mean_regrets_over_all_runs_dist_ucb1 = np.mean(mean_regrets_dist_ucb1, axis=0)
mean_regrets_over_all_runs_ucb1 = np.mean(regrets_ucb1, axis=0)
std_regrets_over_all_runs_dist_ucb1 = np.std(mean_regrets_dist_ucb1, axis=0)
std_regrets_over_all_runs_ucb1 = np.std(regrets_ucb1, axis=0)

In [None]:
plt.figure(figsize=(8,5))
linestyles = ['-.', '-', '--']
for b in range(len(betas)):
    plt.plot(time_axis, mean_regrets_over_all_runs_dist_ucb1[b], linestyles[b], label=r"$\beta=$" + str(betas[b]))
    plt.fill_between(time_axis, mean_regrets_over_all_runs_dist_ucb1[b]-std_regrets_over_all_runs_dist_ucb1[b], mean_regrets_over_all_runs_dist_ucb1[b]+std_regrets_over_all_runs_dist_ucb1[b],alpha=.2)
plt.plot(time_axis, mean_regrets_over_all_runs_ucb1, ':', label="Single Agent UCB1")
plt.fill_between(time_axis, mean_regrets_over_all_runs_ucb1-std_regrets_over_all_runs_ucb1, mean_regrets_over_all_runs_ucb1+std_regrets_over_all_runs_ucb1, alpha=.2)
plt.xlabel("Time")
plt.ylabel("Regret")
plt.legend()
plt.grid()
plt.savefig("./out/dist_UCB_vs_single_UCB.pdf", bbox_inches='tight')