Based on https://bambinos.github.io/bambi/notebooks/hsgp_1d.html

In [None]:
from formulae import design_matrices

import arviz as az
import bambi as bmb
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from bambi.interpret import plot_predictions
from matplotlib.lines import Line2D
from pathlib import Path
from data_utils import download_single_csv

In [None]:
rng = np.random.default_rng(seed=121195)

size = 100
x = np.linspace(0, 50, size)
b = 0.1 * rng.normal(size=6)
sigma = 0.15

dm = design_matrices("0 + bs(x, df=6, intercept=True)", pd.DataFrame({"x": x}))
X = np.array(dm.common)
f = 10 * X @ b
y = f + rng.normal(size=size) * sigma
df = pd.DataFrame({"x": x, "y": y})

fig, ax = plt.subplots(figsize=(9, 6))
ax.scatter(x, y, s=30, alpha=0.8)
ax.plot(x, f, color="black");

In [None]:
model = bmb.Model("y ~ 0 + hsgp(x, m=10, c=2)", df)
model

In [None]:
idata = model.fit(random_seed=121195)
print(idata.sample_stats["diverging"].sum().to_numpy())

In [None]:
az.plot_trace(idata, backend_kwargs={"layout": "constrained"});

In [None]:
prior_hsgp = {
    "sigma": bmb.Prior("Exponential", lam=2),  # amplitude
    "ell": bmb.Prior("InverseGamma", mu=10, sigma=1),  # lengthscale
}

# This is the dictionary we pass to Bambi
priors = {"hsgp(x, m=10, c=2)": prior_hsgp, "sigma": bmb.Prior("HalfNormal", sigma=10)}
model = bmb.Model("y ~ 0 + hsgp(x, m=10, c=2)", df, priors=priors)
model

In [None]:
idata = model.fit(random_seed=121195)
print(idata.sample_stats["diverging"].sum().to_numpy())

In [None]:
az.plot_trace(idata, backend_kwargs={"layout": "constrained"});

In [None]:
prior_hsgp = {
    "sigma": bmb.Prior("Exponential", lam=2),  # amplitude
    "ell": bmb.Prior("InverseGamma", mu=10, sigma=1),  # lengthscale
}

# This is the dictionary we pass to Bambi
priors = {
    "hsgp(x, m=10, c=2, centered=True)": prior_hsgp,
    "sigma": bmb.Prior("HalfNormal", sigma=10),
}
model = bmb.Model("y ~ 0 + hsgp(x, m=10, c=2, centered=True)", df, priors=priors)
model

In [None]:
idata = model.fit(random_seed=121195)
print(idata.sample_stats["diverging"].sum().to_numpy())

In [None]:
az.plot_trace(idata, backend_kwargs={"layout": "constrained"});

In [None]:
fig, ax = plt.subplots(figsize=(9, 6))
ax.scatter(df["x"], df["y"], s=30, color="0.5", alpha=0.5)
plot_predictions(model, idata, "x", ax=ax)
ax.set(xlabel="Predictor", ylabel="Observed");

In [None]:
new_data = pd.DataFrame({"x": np.linspace(0, 50, num=500)})
model.predict(idata, kind="response", data=new_data)
pps = idata.posterior_predictive["y"].to_numpy().reshape(2000, 500)
qts = np.quantile(pps, q=(0.025, 0.975), axis=0)

fig, ax = plt.subplots(figsize=(9, 6))
ax.fill_between(new_data["x"], qts[0], qts[1], color="C0", alpha=0.6)
ax.scatter(df["x"], df["y"], s=30, color="C1", alpha=0.9)
ax.plot(x, f, color="black", ls="--")
ax.set(xlabel="Predictor", ylabel="Observed")
handles = [Line2D([], [], color="black", ls="--"), Line2D([], [], color="C0")]
labels = ["True curve", "Posterior predictive distribution"]
ax.legend(handles, labels);

above cell errors with `ValueError: cannot reshape array of size 2000000 into shape (2000,500)`

In [None]:
download_single_csv(Path("./data"), "gam_data.csv")

In [None]:
data = pd.read_csv("data/gam_data.csv")
data["fac"] = pd.Categorical(data["fac"])
data.head()[["x2", "y", "fac"]]

In [None]:
fig, ax = plt.subplots(figsize=(9, 5))
colors = [f"C{i}" for i in pd.Categorical(data["fac"]).codes]
ax.scatter(data["x2"], data["y"], color=colors, alpha=0.6)
ax.set(xlabel="x2", ylabel="y");

In [None]:
prior_hsgp = {
    "sigma": bmb.Prior("Exponential", lam=3),
    "ell": bmb.Prior("Exponential", lam=3),
}
priors = {
    "hsgp(x2, by=fac, m=12, c=1.5)": prior_hsgp,
    "sigma": bmb.Prior("HalfNormal", sigma=1),
}
model = bmb.Model("y ~ 0 + hsgp(x2, by=fac, m=12, c=1.5)", data, priors=priors)
model.set_alias({"hsgp(x2, by=fac, m=12, c=1.5)": "hsgp"})
model

In [None]:
model.build()
model.graph()

In [None]:
idata = model.fit(target_accept=0.95, random_seed=121195)
print(idata.sample_stats.diverging.sum().item())

In [None]:
az.plot_trace(
    idata,
    var_names=["hsgp_weights", "hsgp_sigma", "hsgp_ell", "sigma"],
    backend_kwargs={"layout": "constrained"},
);

In [None]:
fig, ax = plt.subplots(figsize=(9, 5))
colors = [f"C{i}" for i in pd.Categorical(data["fac"]).codes]
ax.scatter(data["x2"], data["y"], color=colors, alpha=0.6)
plot_predictions(model, idata, ["x2", "fac"], ax=ax);

In [None]:
prior_hsgp = {
    "sigma": bmb.Prior("Exponential", lam=1),
    "ell": bmb.Prior("Exponential", lam=3),
}
priors = {
    "hsgp(x2, by=fac, m=12, c=1.5, share_cov=False)": prior_hsgp,
    "sigma": bmb.Prior("HalfNormal", sigma=1),
}
model = bmb.Model(
    "y ~ 0 + hsgp(x2, by=fac, m=12, c=1.5, share_cov=False)", data, priors=priors
)
model.set_alias({"hsgp(x2, by=fac, m=12, c=1.5, share_cov=False)": "hsgp"})
model

In [None]:
model.build()
model.graph()

In [None]:
idata = model.fit(target_accept=0.95, random_seed=121195)

In [None]:
az.plot_trace(
    idata,
    var_names=["hsgp_ell", "hsgp_sigma", "sigma"],
    backend_kwargs={"layout": "constrained"},
);

In [None]:
az.plot_trace(
    idata, var_names=["hsgp_weights"], backend_kwargs={"layout": "constrained"}
);

In [None]:
basis_n = 6
fig, axes = plt.subplots(3, 1, figsize=(7, 10))
for i in range(3):
    ax = axes[i]
    values = idata.posterior["hsgp_weights"].sel({"hsgp_by": i + 1})
    for j in range(basis_n):
        az.plot_kde(
            values.sel({"hsgp_weights_dim": j}).to_numpy().flatten(),
            ax=ax,
            plot_kwargs={"color": f"C{j}"},
        )

In [None]:
basis_n = 9
fig, axes = plt.subplots(3, 1, figsize=(7, 10))
for i in range(3):
    ax = axes[i]
    values = idata.posterior["hsgp_weights"].sel({"hsgp_by": i + 1})
    for j in range(basis_n):
        az.plot_kde(
            values.sel({"hsgp_weights_dim": j}).to_numpy().flatten(),
            ax=ax,
            plot_kwargs={"color": f"C{j}"},
        )

In [None]:
basis_n = 9
fig, axes = plt.subplots(3, 1, figsize=(7, 10))
for i in range(3):
    ax = axes[i]
    values = idata.posterior["hsgp_weights"].sel({"hsgp_by": i + 1})
    for j in range(basis_n):
        az.plot_kde(
            values.sel({"hsgp_weights_dim": j}).to_numpy().flatten(),
            ax=ax,
            plot_kwargs={"color": f"C{j}"},
        )