Based on https://bambinos.github.io/bambi/notebooks/negative_binomial.html

In [None]:
import arviz as az
import bambi as bmb
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from scipy.stats import nbinom

In [None]:
az.style.use("arviz-darkgrid")

In [None]:
import warnings

warnings.simplefilter(action="ignore", category=FutureWarning)

In [None]:
y = np.arange(0, 30)
k = 3
p1 = 0.5
p2 = 0.3

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(12, 4), sharey=True)

ax[0].bar(y, nbinom.pmf(y, k, p1))
ax[0].set_xticks(np.linspace(0, 30, num=11))
ax[0].set_title(f"k = {k}, p = {p1}")

ax[1].bar(y, nbinom.pmf(y, k, p2))
ax[1].set_xticks(np.linspace(0, 30, num=11))
ax[1].set_title(f"k = {k}, p = {p2}")

fig.suptitle("Y = Number of failures until k successes", fontsize=16);

In [None]:
print(nbinom.pmf(y, k, p1)[0])
print(nbinom.pmf(y, k, p1)[3])

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(12, 4), sharey=True)

ax[0].bar(y + k, nbinom.pmf(y, k, p1))
ax[0].set_xticks(np.linspace(3, 30, num=10))
ax[0].set_title(f"k = {k}, p = {p1}")

ax[1].bar(y + k, nbinom.pmf(y, k, p2))
ax[1].set_xticks(np.linspace(3, 30, num=10))
ax[1].set_title(f"k = {k}, p = {p2}")

fig.suptitle("Y = Number of trials until k successes", fontsize=16);

In [None]:
data = pd.read_stata("https://stats.idre.ucla.edu/stat/stata/dae/nb_data.dta")

In [None]:
data.head()

In [None]:
data["prog"] = data["prog"].map({1: "General", 2: "Academic", 3: "Vocational"})
data.head()

In [None]:
data["prog"].value_counts()

In [None]:
fig, ax = plt.subplots(3, 2, figsize=(8, 6), sharex="col")
programs = list(data["prog"].unique())
programs.sort()

for idx, program in enumerate(programs):
    # Histogram
    ax[idx, 0].hist(data[data["prog"] == program]["math"], edgecolor="black", alpha=0.9)
    ax[idx, 0].axvline(data[data["prog"] == program]["math"].mean(), color="C1")

    # Barplot
    days = data[data["prog"] == program]["daysabs"]
    days_mean = days.mean()
    days_counts = days.value_counts()
    values = list(days_counts.index)
    count = days_counts.values
    ax[idx, 1].bar(values, count, edgecolor="black", alpha=0.9)
    ax[idx, 1].axvline(days_mean, color="C1")

    # Titles
    ax[idx, 0].set_title(program)
    ax[idx, 1].set_title(program)

plt.setp(ax[-1, 0], xlabel="Math score")
plt.setp(ax[-1, 1], xlabel="Days of absence");

In [None]:
model_additive = bmb.Model(
    "daysabs ~ 0 + prog + scale(math)", data, family="negativebinomial"
)
idata_additive = model_additive.fit()

In [None]:
model_interaction = bmb.Model(
    "daysabs ~ 0 + prog + scale(math) + prog:scale(math)",
    data,
    family="negativebinomial",
)
idata_interaction = model_interaction.fit()

In [None]:
az.summary(idata_additive)

In [None]:
az.summary(idata_interaction)

In [None]:
az.plot_forest(
    [idata_additive, idata_interaction],
    model_names=["Additive", "Interaction"],
    var_names=["prog", "scale(math)"],
    combined=True,
    figsize=(8, 4),
);

In [None]:
az.plot_forest(
    idata_interaction, var_names=["prog:scale(math)"], combined=True, figsize=(8, 4)
)
plt.axvline(0);

In [None]:
math_score = np.arange(1, 100)


# This function takes a model and an InferenceData object.
# It returns of length 3 with predictions for each type of program.
def predict(model, idata):
    predictions = []
    for program in programs:
        new_data = pd.DataFrame(
            {"math": math_score, "prog": [program] * len(math_score)}
        )
        new_idata = model.predict(idata, data=new_data, inplace=False)
        prediction = new_idata.posterior["mu"]
        predictions.append(prediction)

    return predictions

In [None]:
prediction_additive = predict(model_additive, idata_additive)
prediction_interaction = predict(model_interaction, idata_interaction)

In [None]:
mu_additive = [prediction.mean(("chain", "draw")) for prediction in prediction_additive]
mu_interaction = [
    prediction.mean(("chain", "draw")) for prediction in prediction_interaction
]

In [None]:
fig, ax = plt.subplots(1, 2, sharex=True, sharey=True, figsize=(10, 4))

for idx, program in enumerate(programs):
    ax[0].plot(math_score, mu_additive[idx], label=f"{program}", color=f"C{idx}", lw=2)
    az.plot_hdi(math_score, prediction_additive[idx], color=f"C{idx}", ax=ax[0])

    ax[1].plot(
        math_score, mu_interaction[idx], label=f"{program}", color=f"C{idx}", lw=2
    )
    az.plot_hdi(math_score, prediction_interaction[idx], color=f"C{idx}", ax=ax[1])

ax[0].set_title("Additive")
ax[1].set_title("Interaction")
ax[0].set_xlabel("Math score")
ax[1].set_xlabel("Math score")
ax[0].set_ylim(0, 25)
ax[0].legend(loc="upper right");