In [1]:
import bambi as bmb
import numpy as np
import pandas as pd

In [2]:
#%load_ext autoreload
#%autoreload 2

In [3]:
data = bmb.load_data("sleepstudy")

In [4]:
data.head()

Unnamed: 0,Reaction,Days,Subject
0,249.56,0,308
1,258.7047,1,308
2,250.8006,2,308
3,321.4398,3,308
4,356.8519,4,308


In [5]:
model = bmb.Model("Reaction ~ 1 + Days + (1 + Days | Subject)", data)
model

       Formula: Reaction ~ 1 + Days + (1 + Days | Subject)
        Family: gaussian
          Link: mu = identity
  Observations: 180
        Priors: 
    target = mu
        Common-level effects
            Intercept ~ Normal(mu: 298.5079, sigma: 261.0092)
            Days ~ Normal(mu: 0.0, sigma: 48.8915)
        
        Group-level effects
            1|Subject ~ Normal(mu: 0.0, sigma: HalfNormal(sigma: 261.0092))
            Days|Subject ~ Normal(mu: 0.0, sigma: HalfNormal(sigma: 48.8915))
        
        Auxiliary parameters
            sigma ~ HalfStudentT(nu: 4.0, sigma: 56.1721)

In [6]:
idata = model.fit()

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [Reaction_sigma, Intercept, Days, 1|Subject_sigma, 1|Subject_offset, Days|Subject_sigma, Days|Subject_offset]


Sampling 2 chains for 1_000 tune and 1_000 draw iterations (2_000 + 2_000 draws total) took 17 seconds.
We recommend running at least 4 chains for robust computation of convergence diagnostics


**Problema:** No estamos guardando la correlacion entre los parametros. No podriamos generar muestras con el metodo "gaussian" porque estariamos omitiendo la correlacion.

Una alternativa seria computar esa correlacion...

In [7]:
import formulae
from formulae import design_matrices

formulae.config["EVAL_UNSEEN_CATEGORIES"] = "silent"

dm = design_matrices("Reaction ~ 1 + Days + (1 + Days|Subject)", data)

df_new = data.head(10).reset_index(drop=True)
df_new["Subject"] = "xxx"
df_new = pd.concat([df_new, data.head(10)])
df_new = df_new.reset_index(drop=True)

dm_new = dm.group.evaluate_new_data(df_new)
dm_new

GroupEffectsMatrix with shape (20, 38)
Terms:  
  1|Subject  
    kind: intercept
    groups: ['308', '309', '310', '330', '331', '332', '333', '334', '335', '337', '349', '350',
      '351', '352', '369', '370', '371', '372', '__NEW_FACTOR_GROUP__']
    columns: 0:19
  Days|Subject  
    kind: numeric
    groups: ['308', '309', '310', '330', '331', '332', '333', '334', '335', '337', '349', '350',
      '351', '352', '369', '370', '371', '372', '__NEW_FACTOR_GROUP__']
    columns: 19:38

To access the actual design matrix do 'np.array(this_obj)'

In [8]:
import xarray as xr

In [9]:
factors_with_new_levels = dm_new.factors_with_new_levels

In [10]:
model.response_component.group_specific_groups

{'Subject': ['1|Subject', 'Days|Subject']}

In [11]:
ndraws = len(idata.posterior.coords["draw"])
factor_dim = "Subject__factor_dim"

factor_levels = idata.posterior.coords[factor_dim].to_numpy()
factor_sampled_idxs = np.random.choice(np.arange(len(factor_levels)), size=ndraws)

original_draws_1 = idata.posterior["1|Subject"].to_numpy()
new_group_draws_1 = original_draws_1[:, np.arange(ndraws), factor_sampled_idxs]

original_draws_2 = idata.posterior["Days|Subject"].to_numpy()
new_group_draws_2 = original_draws_2[:, np.arange(ndraws), factor_sampled_idxs]

coords = {
    "chain": np.arange(2), 
    "draw": np.arange(ndraws), 
    "Subject__factor_dim": ["__NEW_FACTOR_GROUP__"]
}
new_group_draws_1 = xr.DataArray(new_group_draws_1[..., np.newaxis], coords=coords)
new_group_draws_2 = xr.DataArray(new_group_draws_2[..., np.newaxis], coords=coords)

In [12]:
z1 = xr.concat([idata.posterior["1|Subject"], new_group_draws_1], dim="Subject__factor_dim")
z2 = xr.concat([idata.posterior["Days|Subject"], new_group_draws_2], dim="Subject__factor_dim")

u = xr.Dataset({"1|Subject": z1, "Days|Subject": z2})
u = u.to_stacked_array("__variables__", ("chain", "draw"))

In [13]:
response_dim = "Reaction_obs"
design_matrix_dims = (response_dim, "__variables__")

Z = np.asarray(dm_new)
Z = xr.DataArray(Z, dims=design_matrix_dims)
xr.dot(Z, u)

In [14]:
idata.posterior.coords

Coordinates:
  * chain                (chain) int64 0 1
  * draw                 (draw) int64 0 1 2 3 4 5 6 ... 994 995 996 997 998 999
  * Subject__factor_dim  (Subject__factor_dim) <U3 '308' '309' ... '371' '372'

In [19]:
df_new

Unnamed: 0,Reaction,Days,Subject
0,249.56,0,xxx
1,258.7047,1,xxx
2,250.8006,2,xxx
3,321.4398,3,xxx
4,356.8519,4,xxx
5,414.6901,5,xxx
6,382.2038,6,xxx
7,290.1486,7,xxx
8,430.5853,8,xxx
9,466.3535,9,xxx


In [22]:
p = model.predict(idata, data=df_new, inplace=False)

In [28]:
p.posterior["Reaction_mean"]

Necesitamos otros cambios a nivel de formulae

* Si hay observaciones nuevas, se agrega un nuevo grupo, a la derecha de los grupos existentes.
    * Todos los nuevos grupos son interpretados como UN UNICO nuevo grupo
    * No va a tener sentido para los efectos fijos
    * Si va a tener sentido para los efectos aleatorios
* Será mas fácil cuando se tengan que hacer predicciones para un nuevo grupo.
    * Simplemente "apendeamos" 