### Load Pickle file

In [None]:
import pickle
from pathlib import Path

root_module = Path.cwd()
ext = ".pkl"
name = "nroom_main"
load_dir = root_module.parent.joinpath("data/")
file_dir = load_dir.joinpath(name + ext)
data = pickle.load(open(file_dir, "rb"))

### Get metrics from raw data

In [None]:
import numpy as np
from scipy.stats import sem

data["mean_regret"] = {}
data["ci_regret"] = {}
for key in data["total_regret"].keys():
    # Get mean and standard error of total regret
    data["mean_regret"][key] = np.mean(data["total_regret"][key], axis=-1)
    data["ci_regret"][key] = sem(data["total_regret"][key], axis=-1)

### Total regret vs episodes Plot

In [None]:
# Get all param names that were not the same for all runs and that are not seeds.
# Those are the experiments that we want to group on.
%matplotlib widget
import matplotlib.pyplot as plt
from itertools import product
from ube_mbrl.utils.plot import PARAMS

plt.rcParams.update(PARAMS)

# Get unique agent types and deep sea sizes in the data
agent_types = np.array(list((data["total_regret"].keys())), dtype=object)
agent_types = agent_types[agent_types[:,1].argsort()]
# rearrange to desire order 
order = [1, 3, 0, 2]
agent_types = agent_types[order].tolist()
# convert back to tuple so we use it as index
agent_types = [tuple(agent_type) for agent_type in agent_types]

fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(3.3, 1.5),gridspec_kw={'wspace':0.15,'hspace':0.3})

colors = {}
cmap = plt.get_cmap("tab10")
for i, agent_type in enumerate(agent_types):
    colors[agent_type] = cmap(i)

for agent in agent_types[::-1]:
    agent_name, uq_method = agent
    ls = "-"
    if agent_name == "psrl":
        label = fr"\texttt{{{agent_name}}}"
    else:
        if uq_method == "exact_ube_3":
            label = r"\texttt{ofu-exact-ube}" + " (ours)"
        elif uq_method == "ensemble":
            label = r"\texttt{ofu-ensemble-var}"
        else:
            label = fr"\texttt{{{agent_name}-}}\texttt{{{uq_method}}}"
    idx = agent
    eps = data["episodes"][idx]
    mean = data["mean_regret"][idx]
    ci = data["ci_regret"][idx]
    ax.plot(eps, mean, linestyle=ls, linewidth=2, label=label, c=colors[agent])
    upper = mean + ci
    lower = mean - ci
    ax.fill_between(eps, upper, lower, alpha=0.2, color=colors[agent])

handles, labels = ax.get_legend_handles_labels()
plt.rcParams['legend.title_fontsize'] = 'xx-small'
ax.legend(handles[::-1], labels[::-1],
    loc = 'lower center', ncol=2, bbox_to_anchor=(0.45, -0.7), frameon=False, prop=dict(size=8)
)
# ax.set_yscale('log')
# ax.minorticks_off()
ax.set_ylabel("Total regret")
ax.set_xlabel("Episode")
ax.ticklabel_format(axis='x', style='sci', scilimits=(0,0))
ax.ticklabel_format(axis='y', style='sci', scilimits=(0,0))

### Save figures

In [None]:
# Save figures
import os
from pathlib import Path
root_module = Path.cwd()
fig_dir = root_module.parent.joinpath(f"figures/nroom_regret.pdf")
fig.savefig(fig_dir, bbox_inches="tight", transparent=False)

# License

>Copyright (c) 2023 Robert Bosch GmbH
>
>This program is free software: you can redistribute it and/or modify <br>
>it under the terms of the GNU Affero General Public License as published<br>
>by the Free Software Foundation, either version 3 of the License, or<br>
>(at your option) any later version.<br>
>
>This program is distributed in the hope that it will be useful,<br>
>but WITHOUT ANY WARRANTY; without even the implied warranty of<br>
>MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the<br>
>GNU Affero General Public License for more details.<br>
>
>You should have received a copy of the GNU Affero General Public License<br>
>along with this program.  If not, see <https://www.gnu.org/licenses/>.