Based on https://bambinos.github.io/bambi/notebooks/count_roaches.html

In [None]:
import arviz as az
import bambi as bmb
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from scipy.stats import nbinom
from data_utils import download_single_csv
from pathlib import Path

In [None]:
az.style.use("arviz-darkgrid")
SEED = 7355608

In [None]:
download_single_csv(Path("./data"), "roaches.csv")

In [None]:
roaches = pd.read_csv("data/roaches.csv", index_col=0)
# rescale
roaches["roach1"] = roaches["roach1"] / 100
roaches.head()

In [None]:
model_1 = bmb.Model(
    "y ~ roach1 + treatment  + senior + offset(log(exposure2))",
    family="poisson",
    data=roaches,
)
idata_1 = model_1.fit()

In [None]:
az.summary(idata_1)

In [None]:
def plot_log_posterior_ppc(model, idata):
    # plot posterior predictive check
    model.predict(idata, kind="response", inplace=True)
    var_name = "log(y+1)"
    # there is probably a better way
    idata.posterior_predictive[var_name] = np.log(idata.posterior_predictive["y"] + 1)
    idata.observed_data[var_name] = np.log(idata.observed_data["y"] + 1)

    return az.plot_ppc(idata, var_names=[var_name])

In [None]:
plot_log_posterior_ppc(model_1, idata_1)

In [None]:
# check number of zeros in y


def check_zeros(idata):
    # flatten over chains:
    sampled_zeros = (
        (idata.posterior_predictive["y"] == 0).mean(("__obs__")).values.flatten()
    )
    print(f"Fraction of zeros in the observed data: {np.mean(roaches['y']==0)}")
    print(
        f"Fraction of zeros in the posterior predictive check: {np.mean(sampled_zeros)}"
    )
    print(f" 80% CI: {np.percentile(sampled_zeros, [10, 90])}")


check_zeros(idata_1)

In [None]:
model_2 = bmb.Model(
    "y ~ roach1 + treatment  + senior + offset(log(exposure2))",
    family="negativebinomial",
    data=roaches,
)
idata_2 = model_2.fit()

In [None]:
az.summary(idata_2)

In [None]:
plot_log_posterior_ppc(model_2, idata_2)

In [None]:
check_zeros(idata_2)

In [None]:
def plot_zeros(ax, idata, model_label, **kwargs):
    data_zeros = np.mean(roaches["y"] == 0)
    # flatten over chains:
    sampled_zeros = (
        (idata.posterior_predictive["y"] == 0).mean(("__obs__")).values.flatten()
    )
    ax.hist(sampled_zeros, alpha=0.5, **kwargs)
    ax.axvline(data_zeros, color="red", linestyle="--")
    ax.set_xlabel("Fraction of zeros")
    ax.set_title(f"Model: {model_label}")
    ax.yaxis.set_visible(False)
    ax.set_facecolor("white")
    return ax


fig, ax = plt.subplots(1, 2, gridspec_kw={"wspace": 0.2})
plot_zeros(
    ax[0], idata_1, "Poisson", bins=2
)  # use 2 bins to make it more clear. Almost no zeros.
plot_zeros(ax[1], idata_2, "Negative Binomial")

fig.legend(
    ["Observed data", "Posterior predictive"],
    loc="center left",
    bbox_to_anchor=(0.05, 0.8),
)

In [None]:
pymc_model = model_1.backend
pymc_model.model

In [None]:
# recreate the model using pymc
import pymc as pm

with pm.Model() as model_pymc:
    # priors
    alpha = pm.Normal("Intercept", mu=0, sigma=4.5)
    beta_roach1 = pm.Normal("beta_roach1", mu=0, sigma=3.3)
    beta_treatment = pm.Normal("beta_treatment", mu=0, sigma=5.11)
    beta_senior = pm.Normal("beta_senior", mu=0, sigma=5.43)

    # likelihood
    mu = pm.math.exp(
        alpha
        + beta_roach1 * roaches["roach1"]
        + beta_treatment * roaches["treatment"]
        + beta_senior * roaches["senior"]
        + pm.math.log(roaches["exposure2"])
    )  # no beta for exposure2
    y = pm.Poisson("y", mu=mu, observed=roaches["y"])

    idata_pymc = pm.sample(1000)

az.summary(idata_pymc)