In [1]:
import numpy as np
import sys
import os
import pickle as pkl

pickle_path = "./xp"
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import ticker
from matplotlib.ticker import ScalarFormatter

plt.rcParams["text.usetex"] = True
plt.rcParams.update({"font.size": 26})

In [2]:
names_xp = ["xp" + str(i + 1) + "_" for i in range(11)]
optimal_arms_xp = [3, 4, 5, 14, 1, 2, 0, 0, 4, 9, 4]
index_list = [0, 2, 3, 10]
names_xp = [names_xp[index] for index in index_list]
optimal_arms_xp = [optimal_arms_xp[index] for index in index_list]

T_list = [50, 100, 200, 500, 1000, 2000] 

In [5]:
plt.rcParams.update({"font.size": 24})


def plot(name_xp, best_arm, T_list):
    Result = {}
    for T in T_list:
        caption = name_xp + str(T_list[-1])
        with open(os.path.join(pickle_path, caption + ".pkl"), "rb") as f:
            data = pkl.load(f)
            res = data["OptimalPolicy"]["maxima of all trajectories"]
            mean_optimal = np.mean(res)
            q = np.mean(res <= mean_optimal)
            mean_optimal = np.quantile(res, q=q)

        caption = name_xp + str(T)
        with open(os.path.join(pickle_path, caption + ".pkl"), "rb") as f:
            data = pkl.load(f)
            for value in data["Info"]["algorithms"]:
                if value == "OptimalPolicy":
                    continue

                res = data[value]["maxima of all trajectories"]
                mean_policy = np.quantile(res, q=q)

                PER = (mean_optimal - mean_policy) / mean_optimal
                if PER < 0:
                    PER = 0
                mean_max = np.mean(res)

                res = data[value]["Arm pull per trajectory"]

                mean = np.mean(res, axis=0)[best_arm] / T

                med = np.quantile(res, q=0.5, axis=0)[best_arm] / T
                L = (
                    mean - np.std(res / T, axis=0)[best_arm]
                )  # np.quantile(res, q=0.25, axis=0)[best_arm] / T
                U = (
                    mean + np.std(res / T, axis=0)[best_arm]
                )  # np.quantile(res, q=0.75, axis=0)[best_arm] / T

                L_q = np.quantile(res, q=0.25, axis=0)[best_arm] / T
                U_q = np.quantile(res, q=0.75, axis=0)[best_arm] / T

                result = [mean, med, L, U, L_q, U_q, mean_max, PER]

                if value in Result.keys():
                    Result[value].append(result)
                else:
                    Result[value] = [result]

    fig, ax = plt.subplots(figsize=(8, 6))
    # initialize x and y coordinates
    for key, value in Result.items():
        value = np.asarray(value)
        label = key.replace("_", "-")
        ax.plot(T_list, value[:, 0], label=label, marker="*", linewidth=3)
        # if(key=="ExUCB"):
        ax.fill_between(T_list, (value[:, 2]), (value[:, 3]), alpha=0.2)
    ax.set_xscale("log")
    ax.set_xticks(T_list)
    ax.set_xticklabels([i for i in T_list])
    plt.minorticks_off()
    ax.set_ylim(bottom=0)
    ax.set_ylabel("Best Arm pulls rate")
    ax.set_xlabel("Iteration")
    # plt.title("Percentage Best Arm pulls (averaged)")
    # plt.legend()
    plt.savefig(
        "./figures/" + name_xp + "pulling_arms.pdf", dpi=600, bbox_inches="tight"
    )
    plt.close()

    fig, ax = plt.subplots(figsize=(8, 6))
    # initialize x and y coordinates
    for key, value in Result.items():
        value = np.asarray(value)
        label = key.replace("_", "-")
        ax.plot(T_list, value[:, 7], label=label, marker="*", linewidth=3)
    ax.set_xscale("log")
    # ax.set_yscale("log")
    ax.set_xticks(T_list)
    ax.set_xticklabels([i for i in T_list])
    ax.set_ylim(bottom=0)
    ax.set_ylabel("Proxy Empirical Regret")
    ax.set_xlabel("Iteration")
    # plt.title("Max Reward (averaged)")
    loc = "center right"
    bbox_to_anchor = (0.7, 0.45, 1, 0.1)
    ax.legend(
        loc=loc,
        ncol=1,
        fontsize=24,
        bbox_to_anchor=bbox_to_anchor,
        handletextpad=0.15,
        handlelength=1.5,
        frameon=False,
    )
    plt.minorticks_off()
    plt.savefig("./figures/" + name_xp + "regret.pdf", dpi=600, bbox_inches="tight")
    plt.close()

In [6]:
for exp, best_arm in zip(names_xp, optimal_arms_xp):
    plot(exp, best_arm, T_list)