In [None]:
import random

import numpy as np
import scipy.stats as stats

from dist_mbrl.envs.toy_mdp import ToyMDP1

# Set Seeds
seed = 0
np.random.seed(seed)
random.seed(seed)

### Sample MDPs from Posterior and compute value distribution

In [None]:
# Define posterior over the three parameters defining the toy MRP
def build_posterior(alphas, beta):
    p = []
    r = []
    for alpha in alphas:
        mdp = ToyMDP1(alpha, beta)
        p.append(mdp.p)
        r.append(mdp.r)
    return np.array(p), np.array(r)


def compute_values(p_ensemble, r_ensemble, discount=0.99):
    num_models = p_ensemble.shape[0]
    vfs = []
    for i in range(num_models):
        p = p_ensemble[i]
        r = r_ensemble[i]
        vfs.append(np.linalg.inv(np.eye(p.shape[0]) - discount * p).dot(r))
    return np.stack([value for value in vfs], axis=0)


# Generate many samples to approximate the posterior over values
num_samples = 5000
beta = 0.9
alphas = np.clip(
    stats.norm.rvs(loc=0.4, scale=0.1, size=num_samples), a_min=0.0, a_max=1.0
)
p, r = build_posterior(alphas, beta)
vf = compute_values(p, r)

### Plot

In [None]:
%matplotlib widget
import matplotlib.pyplot as plt

from dist_mbrl.utils.plot import JMLR_PARAMS

plt.rcParams.update(JMLR_PARAMS)

# Plotting
fig, axes = plt.subplots(
    nrows=1, ncols=2, figsize=(3.5, 2.0), gridspec_kw={"wspace": 0.35, "hspace": 0.2}
)

x = np.linspace(0, 1, 5000)
kernel_p = stats.gaussian_kde(alphas)
axes[0].plot(x, kernel_p.pdf(x), color="tab:red", lw=2.0)
axes[0].set_xlabel(r"$X$")
axes[0].text(0.6, 2, r"$\Phi(P)$", fontsize=14)
axes[0].set_ylabel("Probability density")
axes[0].set_ylim(bottom=0.0)
axes[0].set_xlim(-0.1, 1.1)

x = np.linspace(-1, 1, 5000)
kernel_v = stats.gaussian_kde(vf[:, 0])
axes[1].plot(x, kernel_v.pdf(x), color="tab:blue", lw=2.0)
axes[1].set_xlabel(r"$V(s_0)$")
axes[1].text(-0.85, 0.85, r"$\mu(s_0)$", fontsize=14)
axes[1].set_ylabel("")
axes[1].set_ylim(bottom=0.0)
axes[1].set_xlim(-1.1, 1.1)
plt.show()

### Save figures

In [None]:
from pathlib import Path

root_module = Path.cwd()
fig_dir = root_module.parent.joinpath("figures/tabular_example_value_distribution.pdf")
fig.savefig(fig_dir, bbox_inches="tight", transparent=False)

# License

>Copyright (c) 2024 Robert Bosch GmbH
>
>This program is free software: you can redistribute it and/or modify <br>
>it under the terms of the GNU Affero General Public License as published<br>
>by the Free Software Foundation, either version 3 of the License, or<br>
>(at your option) any later version.<br>
>
>This program is distributed in the hope that it will be useful,<br>
>but WITHOUT ANY WARRANTY; without even the implied warranty of<br>
>MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the<br>
>GNU Affero General Public License for more details.<br>
>
>You should have received a copy of the GNU Affero General Public License<br>
>along with this program.  If not, see <https://www.gnu.org/licenses/>.