In [4]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.style as style
style.use('tableau-colorblind10')
import scipy.stats as sps
import networkx as nx
from Dist_UCB1 import Dist_UCB1
from UCB1 import UCB1
import time

n_runs = 100
T = 10000
N = 30
rwd_means = [.2, .25, .3, .35, .4, .45, .5, .55, .6, .65]

seeds = []
myseed = 0
while len(seeds) < n_runs:
    G = nx.fast_gnp_random_graph(N, 0.5, directed=False, seed=myseed)
    if nx.is_connected(G):
        seeds.append(myseed)
    myseed += 1

single_agent_distributions = [sps.beta(rwd_means[i], 1-rwd_means[i]) for i in range(len(rwd_means))]
for i in range(len(rwd_means)):
    single_agent_distributions[i].random_state = np.random.RandomState(seed=1)
distributions = [[sps.beta(rwd_means[i], 1-rwd_means[i]) for i in range(len(rwd_means))] for n in range(N)]
for n in range(N):
    for i in range(len(rwd_means)):
        distributions[n][i].random_state = np.random.RandomState(seed=1)
betas = [1, .1, .01]

In [5]:
mean_regrets_dist_ucb1 = np.zeros((n_runs, len(betas), T))
regrets_ucb1 = np.zeros((n_runs, T))
init_time = time.time()
for run in range(n_runs):
    start_run_t = time.time()
    G = nx.fast_gnp_random_graph(N, 0.5, directed=False, seed=seeds[run])
    nodes = list(G.nodes)
    for i in nodes:
        G.add_edge(i,i) 
    for b in range(len(betas)):
        distucb1 = Dist_UCB1(T, distributions, G, beta=betas[b])
        distucb1.run()
        mean_regrets_dist_ucb1[run, b, :] = np.mean(distucb1.regrets, axis=0)

    ucb1 = UCB1(T, single_agent_distributions)
    ucb1.run()
    regrets_ucb1[run, :] = ucb1.regret
    end_run_t = time.time()
    print(f'finished run {run} in {end_run_t - start_run_t}sec')
    

time_axis = list(range(T))
mean_regrets_over_all_runs_dist_ucb1 = np.mean(mean_regrets_dist_ucb1, axis=0)
mean_regrets_over_all_runs_ucb1 = np.mean(regrets_ucb1, axis=0)
std_regrets_over_all_runs_dist_ucb1 = np.std(mean_regrets_dist_ucb1, axis=0)
std_regrets_over_all_runs_ucb1 = np.std(regrets_ucb1, axis=0)
print(f'Total run time = {end_run_t - init_time}sec')

  return (1 + beta)*np.sqrt((3*np.log(t))/(Ni*n))
  return (1 + beta)*np.sqrt((3*np.log(t))/(Ni*n))


finished run 0 in 533.7381789684296sec


KeyboardInterrupt: 

In [None]:
plt.figure(figsize=(8,5))
linestyles = ['-.', '-', '--']
for b in range(len(betas)):
    plt.plot(time_axis, mean_regrets_over_all_runs_dist_ucb1[b], linestyles[b], label=r"$\beta=$" + str(betas[b]))
    plt.fill_between(time_axis, mean_regrets_over_all_runs_dist_ucb1[b]-std_regrets_over_all_runs_dist_ucb1[b], mean_regrets_over_all_runs_dist_ucb1[b]+std_regrets_over_all_runs_dist_ucb1[b],alpha=.2)
plt.plot(time_axis, mean_regrets_over_all_runs_ucb1, ':', label="Single Agent UCB1")
plt.fill_between(time_axis, mean_regrets_over_all_runs_ucb1-std_regrets_over_all_runs_ucb1, mean_regrets_over_all_runs_ucb1+std_regrets_over_all_runs_ucb1, alpha=.2)
plt.xlabel("Time")
plt.ylabel("Regret")
plt.legend()
plt.grid()
plt.savefig("./.out/dist_UCB_vs_single_UCB.pdf", bbox_inches='tight')