Based on https://bambinos.github.io/bambi/notebooks/zero_inflated_regression.html

In [None]:
import arviz as az
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
import numpy as np
import pandas as pd
import scipy.stats as stats
import seaborn as sns
import warnings

import bambi as bmb

warnings.simplefilter(action="ignore", category=FutureWarning)

In [None]:
x = np.arange(0, 22)
psis = [0.7, 0.4]
mus = [10, 4]
plt.figure(figsize=(7, 3))
for psi, mu in zip(psis, mus):
    pmf = stats.poisson.pmf(x, mu)
    pmf[0] = (1 - psi) + pmf[0]  # 1.) generate zeros
    pmf[1:] = psi * pmf[1:]  # 2.) generate counts
    pmf /= pmf.sum()  # normalize to get probabilities
    plt.plot(x, pmf, "-o", label="$\\psi$ = {}, $\\mu$ = {}".format(psi, mu))

plt.title("Zero Inflated Poisson Process")
plt.xlabel("x", fontsize=12)
plt.ylabel("f(x)", fontsize=12)
plt.legend(loc=1)
plt.show()

In [None]:
fish_data = pd.read_csv("https://stats.idre.ucla.edu/stat/data/fish.csv")
cols = ["count", "livebait", "camper", "persons", "child"]
fish_data = fish_data[cols]
fish_data["livebait"] = pd.Categorical(fish_data["livebait"])
fish_data["camper"] = pd.Categorical(fish_data["camper"])
fish_data = fish_data[fish_data["count"] < 60]  # remove outliers

In [None]:
# Excess zeros, and skewed count
plt.figure(figsize=(7, 3))
sns.histplot(fish_data["count"], discrete=True)
plt.xlabel("Number of Fish Caught");

In [None]:
zip_model = bmb.Model(
    "count ~ livebait + camper + persons + child",
    fish_data,
    family="zero_inflated_poisson",
)

zip_idata = zip_model.fit(draws=1000, target_accept=0.95, random_seed=1234, chains=4)

Above cell raises `TypeError: unsupported operand type(s) for -: 'int' and 'method'`, skipping

In [None]:
formula = bmb.Formula(
    "count ~ livebait + camper + persons + child",  # parent parameter mu
    "psi ~ livebait + camper + persons + child",  # non-parent parameter psi
)

zip_model = bmb.Model(formula, fish_data, family="zero_inflated_poisson")

zip_idata = zip_model.fit(draws=1000, target_accept=0.95, random_seed=1234, chains=4)

Above cell raises `TypeError: unsupported operand type(s) for -: 'int' and 'method'`, skipping

In [None]:
x = np.arange(0, 22)
psis = [0.7, 0.4]
mus = [10, 4]

plt.figure(figsize=(7, 3))
for psi, mu in zip(psis, mus):
    pmf = stats.poisson.pmf(x, mu)  # pmf evaluated at x given mu
    cdf = stats.poisson.cdf(0, mu)  # cdf evaluated at 0 given mu
    pmf[0] = 1 - psi  # 1.) generate zeros
    pmf[1:] = (psi * pmf[1:]) / (1 - cdf)  # 2.) generate counts
    pmf /= pmf.sum()  # normalize to get probabilities
    plt.plot(x, pmf, "-o", label="$\\psi$ = {}, $\\mu$ = {}".format(psi, mu))

plt.title("Hurdle Poisson Process")
plt.xlabel("x", fontsize=12)
plt.ylabel("f(x)", fontsize=12)
plt.legend(loc=1)
plt.show()

In [None]:
hurdle_formula = bmb.Formula(
    "count ~ livebait + camper + persons + child",  # parent parameter mu
    "psi ~ livebait + camper + persons + child",  # non-parent parameter psi
)

hurdle_model = bmb.Model(hurdle_formula, fish_data, family="hurdle_poisson")

hurdle_idata = hurdle_model.fit(
    draws=1000, target_accept=0.95, random_seed=1234, chains=4
)

Above cell raises `TypeError: unsupported operand type(s) for -: 'int' and 'method'`, skipping