'Clustered multi-agents linear bandit'
==============================

In [1]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn import metrics
from bandpy import run_trials
from bandpy.env import ClustersCanonicalLinearBandit
from bandpy.controller import ClusteredLinearAgentsController
from bandpy.utils import check_random_state

Global funtions
----------------------

In [2]:
def compute_errors(trial_results):
    """Compute the BAI-errors and the CLusetering-errors."""
    
    Ts, errors_bai, errors_cluster = [], [], []
    for trial_result in trial_results:

        T, _, best_arms, controller, env = trial_result

        err_bai, grp_labels, true_grp_labels = [], [], []
        for agent_name, best_arm in best_arms.items():

            # theta label and cluster group label are confounded
            grp_label = controller.agent_labels[agent_name]
            true_grp_label = env.theta_per_agent[agent_name]

            # theta label and best arm index are confounded
            err_bai_ = int(best_arm != env.theta_per_agent[agent_name])

            err_bai.append(err_bai_)
            grp_labels.append(grp_label)
            true_grp_labels.append(true_grp_label)

        Ts.append(T)
        errors_bai.append(np.mean(err_bai))
        errors_cluster.append(1.0 - metrics.rand_score(true_grp_labels, grp_labels))
        
    return np.array(Ts), np.array(errors_bai), np.array(errors_cluster)


def plot_linear_bandit(arms, thetas, est_thetas):
    """ Plot in 2D the arms/theta configuration."""
    
    plt.figure(len(arms), figsize=(3, 3))
    
    origin = np.array([[0.0, 0.0], [0.0, 0.0]]) # origin point
    
    for arm in arms:
        plt.quiver(*origin, arm[0], arm[1], angles='xy',
                   scale_units='xy', scale=1, color='tab:blue')
    for theta in thetas:
        plt.quiver(*origin, theta[0], theta[1], angles='xy',
                   scale_units='xy', scale=1, color='tab:red')
    for est_theta in est_thetas:
        plt.quiver(*origin, est_theta[0], est_theta[1], angles='xy',
                   scale_units='xy', scale=1, color='tab:green')
        
    max_x = np.max(([np.abs(arm[0]) for arm in arms] +
                    [np.abs(theta[0]) for theta in thetas] +
                    [np.abs(est_theta[0]) for est_theta in est_thetas]))
    max_y = np.max(([np.abs(arm[1]) for arm in arms] +
                    [np.abs(theta[1]) for theta in thetas] +
                    [np.abs(est_theta[1]) for est_theta in est_thetas]))
    
    plt.xlim(-max_x, max_x)
    plt.ylim(-max_y, max_y)
    plt.grid()
    plt.title(f"Linear bandit configuration (K={len(arms)})")
    plt.tight_layout()
    
    plt.show()

Global variables
-----------------------

In [3]:
plt.style.use('tableau-colorblind10')  # I'm colorblind...
N_CLUSTERS = 2  # number of clusters
MAX_RANDINT = 10000  # max possible value to generate seeds
n_agents = 30  # number of agent
n_trials = 16 # average results on 'n_trials'
max_T = 10000  # horizon
seed = None  # seed to generate seeds
n_jobs = 8  # number of CPUs
verbose = False  # Bandpy verbose level

Running the experiment
----------------------------------

In [4]:
env = ClustersCanonicalLinearBandit(d=2, delta=0.01, T=max_T,
                                    n_thetas=N_CLUSTERS,
                                    sigma=1.0, seed=seed)
agent_kwargs = {'arms': env.arms,
                'epsilon': 0.0,
                'delta': 0.001,
                'R': 1.0,
                'S': np.max([np.linalg.norm(theta) for theta in env.thetas]),
                'lbda': 1.0}
rng = check_random_state(seed)
seeds = rng.randint(MAX_RANDINT, size=n_trials)

In [None]:
l_m = [1, 2, 3, 4, 5, 10, 30]
exp_results = dict()
for m in l_m:
    # define controller and assign it to the env
    controller = ClusteredLinearAgentsController(N=n_agents,
                                                 n_clusters=N_CLUSTERS,
                                                 m=m,
                                                 agent_kwargs=agent_kwargs)    
    env.assign_agent_models(list(controller.agents.keys()))
    
    # run trial
    trial_results = run_trials(
            env, controller, enable_controller_early_stopping=True,
            seeds=seeds, n_jobs=n_jobs, verbose=verbose)
    
    # compute error
    Ts, errors_bai, errors_cluster = compute_errors(trial_results)

    Ts -= m
    
    print(f"[m={m}] mean-T-BAI={np.mean(Ts):.0f}, "
          f"mean-errors-bai={np.mean(errors_bai):.2f}, "
          f"mean-errors-cluster={np.mean(errors_cluster):.2f}")
    
    exp_results[m] = (Ts, errors_bai, errors_cluster)

In [None]:
for m, results in exp_results.items():
    
    Ts, errors_bai, errors_cluster = results
    
    fig, axis = plt.subplots(nrows=1, ncols=2, figsize=(5, 2))
    for i, name, err in zip([0, 1],
                            ["BAI error (%)", "Clustering error (%)"],
                            [errors_bai, errors_cluster]):
        axis[i].scatter(Ts - m, 100. * np.array(err))
        axis[i].grid()
        axis[i].set_xlim(0, max_T)
        axis[i].set_ylim(0.0, 100.)
        axis[i].set_title(name, fontsize=11)
    fig.suptitle(f"m={m}", fontsize=13)
    fig.tight_layout()
    fig.show()

In [None]:
mean_T_bai = np.array([np.mean(exp_results[m][0] - m) for m in l_m])
mean_err_bai = np.array([np.mean(exp_results[m][1]) for m in l_m])
mean_err_grp = np.array([np.mean(exp_results[m][2]) for m in l_m])

fig, axis = plt.subplots(nrows=1, ncols=3, figsize=(9, 3))

for i, to_plot, name in zip([0, 1, 2],
                            [mean_T_bai, mean_err_bai, mean_err_grp],
                            ["n-samples", "err-BAI", "err-cluster"]):
    axis[i].plot(l_m, to_plot, lw=2.0)
    axis[i].grid()
    axis[i].set_xlabel("m", fontsize=12)
    axis[i].set_title(name, fontsize=12)

fig.tight_layout()

fig.show()