### Read data from file

In [None]:
import pickle
from pathlib import Path

root_module = Path.cwd()
ext = ".pkl"
file_name = "mountaincar"
load_dir = root_module.parent.joinpath("data/")
file_dir = load_dir.joinpath(file_name + ext)
data = pickle.load(open(file_dir, "rb"))

### Process raw results

In [None]:
from collections import defaultdict

import numpy as np
from scipy.stats import sem

from dist_mbrl.utils.process_results import rolling_average

reward_scales = data["reward_scales"]
raw_returns = data["raw_returns"]
steps = data["steps"]

WINDOW_SIZE = 10

mean_returns = defaultdict(dict)
ci_returns = defaultdict(dict)
for reward_scale in reward_scales:
    for idx in raw_returns[reward_scale].keys():
        smoothened_returns = rolling_average(
            WINDOW_SIZE, raw_returns[reward_scale][idx]
        )
        mean_returns[reward_scale][idx] = np.mean(smoothened_returns, axis=-1)
        sem_return = sem(smoothened_returns, axis=-1)
        ci_returns[reward_scale][idx] = 1.00 * sem_return

### Plotting

In [None]:
%matplotlib widget
import matplotlib.pyplot as plt

from dist_mbrl.utils.plot import JMLR_PARAMS, LIGHT_GREY

plt.rcParams.update(JMLR_PARAMS)

fig, axes = plt.subplots(
    nrows=1, ncols=3, figsize=(6.8, 1.0), gridspec_kw={"wspace": 0.4, "hspace": 0.5}
)

colors = {}
cmap = plt.get_cmap("tab10")
for i, type in enumerate(mean_returns[reward_scales[0]].keys()):
    colors[type] = cmap(i)

ep_length = 1000


def process_label(params):
    label = ""
    for param in params:
        if param != "nan":
            label += rf"\texttt{{ {param}}}"
    return label


for i, reward_scale in enumerate(reward_scales[::-1]):
    ax = axes[i]
    ax.set_title(f"Reward scale {reward_scale}x")
    for idx in mean_returns[reward_scale].keys():
        label = process_label(idx)
        ax.plot(
            steps[reward_scale][idx] // ep_length,
            mean_returns[reward_scale][idx],
            linestyle="-",
            linewidth=1.5,
            label=label,
            c=colors[idx],
        )
        ax.fill_between(
            steps[reward_scale][idx] // ep_length,
            mean_returns[reward_scale][idx] - ci_returns[reward_scale][idx],
            mean_returns[reward_scale][idx] + ci_returns[reward_scale][idx],
            alpha=0.2,
            color=colors[idx],
        )
        ax.grid(color=LIGHT_GREY)

axes[0].set_ylabel("Return")

for ax in axes:
    ax.set_xlabel(r"Env steps $\times 10^3$", labelpad=0)

axes[0].legend(loc="lower center", ncol=4, bbox_to_anchor=(1.9, -1.2), frameon=False)

### Save figure

In [None]:
fig_dir = root_module.parent.joinpath("figures/mountaincar.pdf")
fig.savefig(fig_dir, bbox_inches="tight", transparent=False)

# License

>Copyright (c) 2024 Robert Bosch GmbH
>
>This program is free software: you can redistribute it and/or modify <br>
>it under the terms of the GNU Affero General Public License as published<br>
>by the Free Software Foundation, either version 3 of the License, or<br>
>(at your option) any later version.<br>
>
>This program is distributed in the hope that it will be useful,<br>
>but WITHOUT ANY WARRANTY; without even the implied warranty of<br>
>MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the<br>
>GNU Affero General Public License for more details.<br>
>
>You should have received a copy of the GNU Affero General Public License<br>
>along with this program.  If not, see <https://www.gnu.org/licenses/>.