In [1]:
import warnings

import arviz as az
import numpy as np
import pandas as pd
import xarray as xr

import bambi as bmb

bmb.config["INTERPRET_VERBOSE"] = False
warnings.simplefilter(action='ignore', category=FutureWarning)

%load_ext autoreload
%autoreload 2



In [2]:
fish_data = pd.read_stata("http://www.stata-press.com/data/r11/fish.dta")
cols = ["count", "livebait", "camper", "persons", "child"]
fish_data = fish_data[cols]
fish_data["child"] = fish_data["child"].astype(np.int8)
fish_data["persons"] = fish_data["persons"].astype(np.int8)
fish_data["livebait"] = pd.Categorical(fish_data["livebait"])
fish_data["camper"] = pd.Categorical(fish_data["camper"])

In [3]:
fish_model = bmb.Model(
    "count ~ livebait + camper + persons + child", 
    fish_data, 
    family='zero_inflated_poisson'
)

fish_idata = fish_model.fit(chains=4, random_seed=1234)

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [count_psi, Intercept, livebait, camper, persons, child]


Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 3 seconds.


## Advanced usage

In [4]:
from bambi.interpret.helpers import data_grid, select_draws

### Create your own data

In [5]:
conditional = {
    "camper": np.array([0, 1]),
    "persons": np.arange(0, 5, 1),
}
variable = {"livebait": np.array([0, 1])}

In [6]:
new_data = data_grid(fish_model, conditional, variable)
new_data

Unnamed: 0,camper,persons,livebait,child
0,0,0,0,0
1,0,0,1,0
2,0,1,0,0
3,0,1,1,0
4,0,2,0,0
5,0,2,1,0
6,0,3,0,0
7,0,3,1,0
8,0,4,0,0
9,0,4,1,0


### Compute comparisons

In [11]:
idata_new = fish_model.predict(fish_idata, data=new_data, inplace=False)

In [12]:
idata_new

In [13]:
draw_1 = select_draws(idata_new, new_data, {"livebait": 0}, "count_mean")
draw_2 = select_draws(idata_new, new_data, {"livebait": 1}, "count_mean")

(draw_2 - draw_1).mean(("chain", "draw"))

We can compare this comparison with `bmb.interpret.comparisons`.

In [15]:
summary_df =bmb.interpret.comparisons(
    fish_model,
    fish_idata,
    contrast={"livebait": [0, 1]},
    conditional=conditional
)
summary_df

Unnamed: 0,term,estimate_type,value,camper,persons,child,estimate,lower_3.0%,upper_97.0%
0,livebait,diff,"(0, 1)",0,0,0,0.361526,0.232076,0.478699
1,livebait,diff,"(0, 1)",0,1,0,0.863481,0.614187,1.094623
2,livebait,diff,"(0, 1)",0,2,0,2.066431,1.567933,2.531342
3,livebait,diff,"(0, 1)",0,3,0,4.955033,3.971103,6.00496
4,livebait,diff,"(0, 1)",0,4,0,11.905085,9.586727,14.514207
5,livebait,diff,"(0, 1)",1,0,0,0.708658,0.498476,0.939042
6,livebait,diff,"(0, 1)",1,1,0,1.69257,1.273824,2.088714
7,livebait,diff,"(0, 1)",1,2,0,4.050508,3.329138,4.769666
8,livebait,diff,"(0, 1)",1,3,0,9.712447,8.400628,11.041247
9,livebait,diff,"(0, 1)",1,4,0,23.334955,19.752432,26.467248


### Cross comparisons

Comparing between contrasts and levels (if `conditional` values are categorical).

The `np.where` function below returns a boolean array 

In [16]:
cond_11 = {
    "camper": 0,
    "persons": 1,
    "child": 0,
    "livebait": 0 
}

cond_14 = {
    "camper": 0,
    "persons": 4,
    "child": 0,
    "livebait": 0
}

draws_11 = select_draws(idata_new, new_data, cond_11, "count_mean")
draws_14 = select_draws(idata_new, new_data, cond_14, "count_mean")

In [18]:
cond_21 = {
    "camper": 0,
    "persons": 1,
    "child": 0,
    "livebait": 1
}

cond_24 = {
    "camper": 0,
    "persons": 4,
    "child": 0,
    "livebait": 1
}

draws_21 = select_draws(idata_new, new_data, cond_21, "count_mean")
draws_24 = select_draws(idata_new, new_data, cond_24, "count_mean")

After computing the "first level" comparison (`diff_1` and `diff_2`) we can compute the difference between these two differences to obtain a _cross-comparison_. The cross-comparison is useful for when we want to know what happens when two (or more) predictors change at the same time.

In [19]:
diff_1 = (draws_14 - draws_11)
diff_2 = (draws_24 - draws_21)

cross_comparison = (diff_2 - diff_1).mean(("chain", "draw"))
cross_comparison