In [None]:
import random

import numpy as np
import scipy.stats as stats

from dist_mbrl.envs.toy_mdp import ToyMDP1

# Set Seeds
seed = 0
np.random.seed(seed)
random.seed(seed)

### Sample MDPs from Posterior and compute value distribution

In [None]:
# Define posterior over the three parameters defining the toy MRP
def build_posterior(alphas, beta):
    p = []
    r = []
    for alpha in alphas:
        mdp = ToyMDP1(alpha, beta)
        p.append(mdp.p)
        r.append(mdp.r)
    return np.array(p), np.array(r)


def compute_values(p_ensemble, r_ensemble, discount=0.99):
    num_models = p_ensemble.shape[0]
    vfs = []
    for i in range(num_models):
        p = p_ensemble[i]
        r = r_ensemble[i]
        vfs.append(np.linalg.inv(np.eye(p.shape[0]) - discount * p).dot(r))
    return np.stack([value for value in vfs], axis=0)


# Generate many samples to approximate the posterior over values
num_samples = 5000
beta = 0.9
alphas = np.clip(
    stats.norm.rvs(loc=0.4, scale=0.1, size=num_samples), a_min=0.0, a_max=1.0
)
p, r = build_posterior(alphas, beta)
vf = compute_values(p, r)

### Compute Quantile-Regression Loss

In [None]:
# First we compute the ground-truth 25th, 50th and 75th percentiles of the value function
tau = [0.1, 0.25, 0.5, 0.75, 0.9]


def get_quantiles(values, target_levels):
    values = np.sort(values)
    quant_levels = (2 * np.arange(len(values)) + 1) / (2.0 * len(values))
    idx = [np.abs(quant_levels - level).argmin() for level in target_levels]
    return values[idx]


true_quantiles = get_quantiles(vf[:, 0], tau)

# now we want to plot the quantile-regression loss for each value of tau
values = np.linspace(-1, 1, 1000)
qr_loss = []
target = vf[:, 0]
for t in tau:
    l = np.zeros_like(values)
    for i, v in enumerate(values):
        loss = (
            t * np.where(target >= v, 1.0, 0.0)
            + (1 - t) * np.where(target < v, 1.0, 0.0)
        ) * np.abs(target - v)
        l[i] = np.mean(loss)

    qr_loss.append(l)

### Plot

In [None]:
%matplotlib widget
import matplotlib.pyplot as plt

from dist_mbrl.utils.plot import JMLR_PARAMS

plt.rcParams.update(JMLR_PARAMS)

# Plotting
fig, axes = plt.subplots(
    nrows=1, ncols=2, figsize=(6.5, 2.0), gridspec_kw={"wspace": 0.50, "hspace": 0.2}
)

# Colors for the different quantiles
cmap = plt.get_cmap("tab10")

x = np.linspace(-1, 1, 5000)
kernel_v = stats.gaussian_kde(vf[:, 0])
axes[0].plot(x, kernel_v.pdf(x), color="tab:blue", lw=2.0)

labels = [rf"$\tau = {{{t}}}$" for t in tau]

for i, q in enumerate(true_quantiles):
    axes[0].axvline(q, c=cmap(i + 1), ls="--", label=labels[i])
    axes[1].plot(values, qr_loss[i], c=cmap(i + 1))
    axes[1].axvline(values[np.argmin(qr_loss[i])], c=cmap(i + 1), ls="--")


axes[0].legend(fontsize=9)
axes[0].set_xlabel(r"$V(s_0)$")
axes[0].set_ylabel("Probability density")
axes[0].text(-0.8, 0.2, r"$\mu(s_0)$", fontsize=14)
axes[0].set_ylim(bottom=0.0)
axes[0].set_xlim(-1.1, 1.1)

axes[1].set_xlabel(r"$v$")
axes[1].set_ylabel(r"QR loss $\mathcal{L}_{s_0}^{\tau}(v)$")

plt.show()

### Save figure

In [None]:
from pathlib import Path

root_module = Path.cwd()
fig_dir = root_module.parent.joinpath("figures/tabular_qr_loss.pdf")
fig.savefig(fig_dir, bbox_inches="tight", transparent=False)

# License

>Copyright (c) 2024 Robert Bosch GmbH
>
>This program is free software: you can redistribute it and/or modify <br>
>it under the terms of the GNU Affero General Public License as published<br>
>by the Free Software Foundation, either version 3 of the License, or<br>
>(at your option) any later version.<br>
>
>This program is distributed in the hope that it will be useful,<br>
>but WITHOUT ANY WARRANTY; without even the implied warranty of<br>
>MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the<br>
>GNU Affero General Public License for more details.<br>
>
>You should have received a copy of the GNU Affero General Public License<br>
>along with this program.  If not, see <https://www.gnu.org/licenses/>.