### Read data from file

In [None]:
import pickle
from pathlib import Path

root_module = Path.cwd()
ext = ".pkl"
file_name = "dmc_main"
load_dir = root_module.parent.joinpath("data/")
file_dir = load_dir.joinpath(file_name + ext)
raw_data = pickle.load(open(file_dir, "rb"))

### Process raw results

In [None]:
from dist_mbrl.utils.process_results import (
    get_bootstrap_intervals,
    get_performance_profile,
    process_raw_results,
)

# Post-process raw return data to get mean and standard error
metric_data = process_raw_results(raw_data)

# Aggregate metric_data across environments for global statistics
aggregated_metrics = get_bootstrap_intervals(metric_data)


# Retrieve relevant variables from data
env_names = metric_data["env_names"]
steps = metric_data["steps"]
mean_returns = metric_data["mean_returns"]
ci_returns = metric_data["ci_returns"]

# Aggregated metrics
agg_mean_returns = aggregated_metrics["mean_returns"]
agg_median_returns = aggregated_metrics["median_returns"]
agg_iqm_returns = aggregated_metrics["iqm_returns"]
agg_mean_ci_returns = aggregated_metrics["mean_ci_returns"]
agg_median_ci_returns = aggregated_metrics["median_ci_returns"]
agg_iqm_ci_returns = aggregated_metrics["iqm_ci_returns"]
agg_steps = aggregated_metrics["steps"]

### `rliable` performance profile

In [None]:
performance_profile = get_performance_profile(metric_data, episode=-1)

perf_score_dist = performance_profile["score_distributions"]
perf_score_ci_dist = performance_profile["score_cis"]
perf_steps = performance_profile["steps"]

### Print scores to a file

In [None]:
file_dir = root_module.parent.joinpath("data/dmc_scores.txt")
file = open(file_dir, "w")


def score_label(params):
    label = ""
    for param in params[1:]:
        if str(param) != "nan":
            label += f"{param} "
    return label


for env_name in env_names:
    file.write(f"{env_name}\n")
    for idx in mean_returns[env_name].keys():
        file.write(
            f"\t{score_label(idx)}: {mean_returns[env_name][idx][-1]:.1f} +/- {ci_returns[env_name][idx][-1]:.1f}\n"
        )

file.write("Mean\n")
for idx in mean_returns[env_names[0]].keys():
    file.write(
        f"\t{score_label(idx)}(100): {agg_mean_returns[idx][99]:.2f} ({agg_mean_ci_returns[idx][0][99]:.2f}, {agg_mean_ci_returns[idx][1][99]:.2f})\n"
    )
    file.write(
        f"\t{score_label(idx)}(250): {agg_mean_returns[idx][-1]:.2f} ({agg_mean_ci_returns[idx][0][-1]:.2f}, {agg_mean_ci_returns[idx][1][-1]:.2f})\n"
    )

file.write("Median\n")
for idx in mean_returns[env_names[0]].keys():
    file.write(
        f"\t{score_label(idx)}(100): {agg_median_returns[idx][99]:.2f} ({agg_median_ci_returns[idx][0][99]:.2f}, {agg_median_ci_returns[idx][1][99]:.2f})\n"
    )
    file.write(
        f"\t{score_label(idx)}(250): {agg_median_returns[idx][-1]:.2f} ({agg_median_ci_returns[idx][0][-1]:.2f}, {agg_median_ci_returns[idx][1][-1]:.2f})\n"
    )

file.write("IQM\n")
for idx in mean_returns[env_names[0]].keys():
    file.write(
        f"\t{score_label(idx)}(100): {agg_iqm_returns[idx][99]:.2f} ({agg_iqm_ci_returns[idx][0][99]:.2f}, {agg_iqm_ci_returns[idx][1][99]:.2f})\n"
    )
    file.write(
        f"\t{score_label(idx)}(250): {agg_iqm_returns[idx][-1]:.2f} ({agg_iqm_ci_returns[idx][0][-1]:.2f}, {agg_iqm_ci_returns[idx][1][-1]:.2f})\n"
    )
file.close()

### Plotting - Full benchmark

In [None]:
%matplotlib widget
import matplotlib.pyplot as plt
import numpy as np

from dist_mbrl.utils.plot import (
    JMLR_PARAMS,
    LIGHT_GREY,
    default_process_label,
    handle_2D_axes_and_legend,
    plot_with_intervals,
    plot_with_symmetric_intervals,
)

plt.rcParams.update(JMLR_PARAMS)

# Define grid of plots
ncols = 4
nrows = int(np.ceil(len(env_names) / ncols))
fig, axes = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(6.5, 6.0),
    gridspec_kw={"wspace": 0.45, "hspace": 0.50},
)

# Reference axes by environment name
ax_dict = {}
for idx, ax in zip(env_names, axes.flatten()):
    ax_dict[idx] = ax

# Assign colors to each of the methods we are comparing
colors = {}
cmap = plt.get_cmap("tab10")
for i, type in enumerate(mean_returns[env_names[0]].keys()):
    colors[type] = cmap(i)

# Plot for all environments and all methods
ep_length = 1000
for env_name in env_names:
    for idx in mean_returns[env_name].keys():
        plot_with_symmetric_intervals(
            ax=ax_dict[env_name],
            x=steps[env_name][idx] // ep_length,
            y=mean_returns[env_name][idx],
            yerr=ci_returns[env_name][idx],
            label=default_process_label(idx[1:]),
            title=env_name,
            color=colors[idx],
        )

handle_2D_axes_and_legend(axes=axes, legend_ncol=4, legend_offset=(2.6, -5.6))

# Locally modify the title of cartpole envs since they are too long
axes[0, 2].set_title("cartpole-balance")
axes[0, 3].set_title("cartpole-swingup")

### IQM Return and Performance profile

In [None]:
# Define grid of plots
ncols = 2
nrows = int(np.ceil(len(env_names) / ncols))
fig_agg, axes = plt.subplots(
    nrows=1,
    ncols=ncols,
    figsize=(6.5, 2.0),
    gridspec_kw={"wspace": 0.4, "hspace": 0.50},
)

# Assign colors to each of the methods we are comparing
colors = {}
cmap = plt.get_cmap("tab10")
for i, type in enumerate(mean_returns[env_names[0]].keys()):
    colors[type] = cmap(i)

# IQM Plot
for idx in mean_returns[env_names[0]].keys():
    plot_with_intervals(
        ax=axes[0],
        x=agg_steps[:250] // ep_length,
        y=agg_iqm_returns[idx],
        ylow=agg_iqm_ci_returns[idx][0],
        yhigh=agg_iqm_ci_returns[idx][1],
        label=default_process_label(idx[1:]),
        color=colors[idx],
        grid_color=LIGHT_GREY,
    )

axes[0].set_ylabel("Normalized IQM Return")
axes[0].set_xlabel("Episodes")

# Performance Profile
for idx in mean_returns[env_names[0]].keys():
    plot_with_intervals(
        ax=axes[1],
        x=perf_steps,
        y=perf_score_dist[idx],
        ylow=perf_score_ci_dist[idx][0],
        yhigh=perf_score_ci_dist[idx][1],
        label=default_process_label(idx[1:]),
        color=colors[idx],
        grid_color=LIGHT_GREY,
    )

axes[1].set_ylabel("Fraction of runs" + "\n" + r"with score $>\tau$")
axes[1].set_xlabel(r"Normalized score ($\tau$)")

axes[0].legend(loc="lower center", ncol=4, bbox_to_anchor=(1.1, -0.65), frameon=False)

### Zoom of performance profile

In [None]:
fig_zoom, ax = plt.subplots(
    nrows=1, ncols=1, figsize=(2.5, 1.0), gridspec_kw={"wspace": 0.4, "hspace": 0.50}
)
for idx in mean_returns[env_names[0]].keys():
    plot_with_intervals(
        ax=ax,
        x=perf_steps[150:250],
        y=perf_score_dist[idx][150:250],
        ylow=perf_score_ci_dist[idx][0][150:250],
        yhigh=perf_score_ci_dist[idx][1][150:250],
        label=default_process_label(idx[1:]),
        color=colors[idx],
        grid_color=LIGHT_GREY,
    )

### Plotting - Subset

In [None]:
from dist_mbrl.utils.plot import handle_1D_axes_and_legend

envs_to_plot = [
    "cartpole-swingup_sparse",
    "cheetah-run",
    "quadruped-run",
    "walker-run",
]

# Define grid of plots
ncols = 4
nrows = 1
fig_lite, axes = plt.subplots(
    nrows=nrows,
    ncols=ncols,
    figsize=(6.8, 1.0),
    gridspec_kw={"wspace": 0.45, "hspace": 0.50},
)

# Reference axes by environment name
ax_dict = {}
for idx, ax in zip(envs_to_plot, axes.flatten()):
    ax_dict[idx] = ax

for env_name in envs_to_plot:
    for idx in mean_returns[env_name].keys():
        plot_with_symmetric_intervals(
            ax=ax_dict[env_name],
            x=steps[env_name][idx] // ep_length,
            y=mean_returns[env_name][idx],
            yerr=ci_returns[env_name][idx],
            label=default_process_label(idx[1:]),
            title=env_name,
            color=colors[idx],
        )

handle_1D_axes_and_legend(axes=axes, legend_ncol=4, legend_offset=(2.6, -1.2))

### Save figures

In [None]:
fig_dir = root_module.parent.joinpath("figures/dmc_main_appendix.pdf")
fig.savefig(fig_dir, bbox_inches="tight", transparent=False)

fig_dir = root_module.parent.joinpath("figures/dmc_rliable.pdf")
fig_agg.savefig(fig_dir, bbox_inches="tight", transparent=False)

fig_dir = root_module.parent.joinpath("figures/dmc_perf_profile_zoom.pdf")
fig_zoom.savefig(fig_dir, bbox_inches="tight", transparent=False)

fig_dir = root_module.parent.joinpath("figures/dmc_main.pdf")
fig_lite.savefig(fig_dir, bbox_inches="tight", transparent=False)

# License

>Copyright (c) 2024 Robert Bosch GmbH
>
>This program is free software: you can redistribute it and/or modify <br>
>it under the terms of the GNU Affero General Public License as published<br>
>by the Free Software Foundation, either version 3 of the License, or<br>
>(at your option) any later version.<br>
>
>This program is distributed in the hope that it will be useful,<br>
>but WITHOUT ANY WARRANTY; without even the implied warranty of<br>
>MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the<br>
>GNU Affero General Public License for more details.<br>
>
>You should have received a copy of the GNU Affero General Public License<br>
>along with this program.  If not, see <https://www.gnu.org/licenses/>.