Based on https://bambinos.github.io/bambi/notebooks/distributional_models.html

In [None]:
import arviz as az
import bambi as bmb
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from matplotlib.lines import Line2D

In [None]:
import warnings

warnings.simplefilter(action="ignore", category=FutureWarning)  # ArviZ

az.style.use("arviz-doc")

In [None]:
rng = np.random.default_rng(121195)
N = 200
a, b = 0.5, 1.1
x = rng.uniform(-1.5, 1.5, N)
shape = np.exp(0.3 + x * 0.5 + rng.normal(scale=0.1, size=N))
y = rng.gamma(shape, np.exp(a + b * x) / shape, N)
data = pd.DataFrame({"x": x, "y": y})
new_data = pd.DataFrame({"x": np.linspace(-1.5, 1.5, num=50)})

In [None]:
formula = bmb.Formula("y ~ x")
model_constant = bmb.Model(formula, data, family="gamma", link="log")
model_constant

In [None]:
model_constant.build()
model_constant.graph()

In [None]:
idata_constant = model_constant.fit(
    random_seed=121195, idata_kwargs={"log_likelihood": True}
)

In [None]:
model_constant.predict(idata_constant, kind="response_params", data=new_data)
model_constant.predict(idata_constant, kind="response", data=new_data)

qts_constant = (
    az.extract(idata_constant.posterior_predictive, var_names="y")
    .quantile([0.025, 0.975], "sample")
    .to_numpy()
)
mean_constant = (
    az.extract(idata_constant.posterior_predictive, var_names="y")
    .mean("sample")
    .to_numpy()
)

In [None]:
fig, ax = plt.subplots(figsize=(8, 4.5), dpi=120)

az.plot_hdi(new_data["x"], qts_constant, ax=ax, fill_kwargs={"alpha": 0.4})
ax.plot(new_data["x"], mean_constant, color="C0", lw=2)
ax.scatter(data["x"], data["y"], color="k", alpha=0.2)
ax.set(xlabel="Predictor", ylabel="Outcome");

In [None]:
formula_varying = bmb.Formula("y ~ x", "alpha ~ x")
model_varying = bmb.Model(
    formula_varying, data, family="gamma", link={"mu": "log", "alpha": "log"}
)
model_varying

In [None]:
model_varying.build()
model_varying.graph()

In [None]:
idata_varying = model_varying.fit(
    random_seed=121195,
    idata_kwargs={"log_likelihood": True},
    include_response_params=True,
)

In [None]:
fig, ax = plt.subplots(figsize=(8, 4.5), dpi=120)

for idx in idata_varying.posterior.coords.get("__obs__"):
    values = idata_varying.posterior["alpha"].sel(__obs__=idx).to_numpy().flatten()
    grid, pdf = az.kde(values)
    ax.plot(grid, pdf, lw=0.05, color="k")

values = idata_constant.posterior["alpha"].to_numpy().flatten()
grid, pdf = az.kde(values)
ax.plot(grid, pdf, lw=2, color="C0")
# Create legend
handles = [
    Line2D([0], [0], label="Varying alpha", lw=1.5, color="k", alpha=0.6),
    Line2D([0], [0], label="Constant alpha", lw=1.5, color="C0"),
]

legend = ax.legend(handles=handles, loc="upper right", fontsize=14)

ax.set(xlabel="Alpha posterior", ylabel="Density");

In [None]:
model_varying.predict(idata_varying, kind="response_params", data=new_data)
model_varying.predict(idata_varying, kind="response", data=new_data)

qts_varying = (
    az.extract(idata_varying.posterior_predictive, var_names="y")
    .quantile([0.025, 0.975], "sample")
    .to_numpy()
)
mean_varying = (
    az.extract(idata_varying.posterior_predictive, var_names="y")
    .mean("sample")
    .to_numpy()
)

In [None]:
fig, ax = plt.subplots(figsize=(8, 4.5), dpi=120)

az.plot_hdi(new_data["x"], qts_constant, ax=ax, fill_kwargs={"alpha": 0.4})
ax.plot(new_data["x"], mean_constant, color="C1", label="constant")

az.plot_hdi(new_data["x"], qts_varying, ax=ax, fill_kwargs={"alpha": 0.4, "color": "k"})
ax.plot(new_data["x"], mean_varying, color="k", label="varying")
ax.set(xlabel="Predictor", ylabel="Outcome")
plt.legend();

In [None]:
data = bmb.load_data("bikes")
# Remove data, you may later try to refit the model to the whole data
data = data[::50]
data = data.reset_index(drop=True)

In [None]:
formula = bmb.Formula(
    "count ~ 0 + bs(hour, 8, intercept=True)", "alpha ~ 0 + bs(hour, 8, intercept=True)"
)
model_bikes = bmb.Model(formula, data, family="negativebinomial")
model_bikes

In [None]:
idata_bikes = model_bikes.fit()

In [None]:
hour = np.linspace(0, 23, num=200)
new_data = pd.DataFrame({"hour": hour})
model_bikes.predict(idata_bikes, data=new_data, kind="response")

In [None]:
q = [0.025, 0.975]
dims = ("chain", "draw")

mean = idata_bikes.posterior["mu"].mean(dims).to_numpy()
mean_interval = idata_bikes.posterior["mu"].quantile(q, dims).to_numpy()
y_interval = idata_bikes.posterior_predictive["count"].quantile(q, dims).to_numpy()

fig, ax = plt.subplots(figsize=(12, 4))
ax.scatter(data["hour"], data["count"], alpha=0.3, color="k")
ax.plot(hour, mean, color="C3")
ax.fill_between(hour, mean_interval[0], mean_interval[1], alpha=0.5, color="C1")
az.plot_hdi(hour, y_interval, fill_kwargs={"color": "C1", "alpha": 0.3}, ax=ax);