In [2]:
import bambi as bmb
import numpy as np
import pandas as pd

In [3]:
data = bmb.load_data("sleepstudy")

In [4]:
data.head()

Unnamed: 0,Reaction,Days,Subject
0,249.56,0,308
1,258.7047,1,308
2,250.8006,2,308
3,321.4398,3,308
4,356.8519,4,308


In [5]:
model = bmb.Model("Reaction ~ 1 + Days + (1 + Days | Subject)", data)
model

       Formula: Reaction ~ 1 + Days + (1 + Days | Subject)
        Family: gaussian
          Link: mu = identity
  Observations: 180
        Priors: 
    target = mu
        Common-level effects
            Intercept ~ Normal(mu: 298.5079, sigma: 261.0092)
            Days ~ Normal(mu: 0.0, sigma: 48.8915)
        
        Group-level effects
            1|Subject ~ Normal(mu: 0.0, sigma: HalfNormal(sigma: 261.0092))
            Days|Subject ~ Normal(mu: 0.0, sigma: HalfNormal(sigma: 48.8915))
        
        Auxiliary parameters
            sigma ~ HalfStudentT(nu: 4.0, sigma: 56.1721)

In [6]:
idata = model.fit()

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [Reaction_sigma, Intercept, Days, 1|Subject_sigma, 1|Subject_offset, Days|Subject_sigma, Days|Subject_offset]


Sampling 2 chains for 1_000 tune and 1_000 draw iterations (2_000 + 2_000 draws total) took 20 seconds.
We recommend running at least 4 chains for robust computation of convergence diagnostics


**Problema:** No estamos guardando la correlacion entre los parametros. No podriamos generar muestras con el metodo "gaussian" porque estariamos omitiendo la correlacion.

Una alternativa seria computar esa correlacion...

In [21]:
ndraws = len(idata.posterior.coords["draw"])
factor_dim = "Subject__factor_dim"

factor_levels = idata.posterior.coords[factor_dim].to_numpy()
factor_sampled_idxs = np.random.choice(np.arange(len(factor_levels)), size=ndraws)

original_draws_1 = idata.posterior["1|Subject"].to_numpy()
new_group_draws_1 = original_draws_1[:, np.arange(ndraws), factor_sampled_idxs]

original_draws_2 = idata.posterior["Days|Subject"].to_numpy()
new_group_draws_2 = original_draws_2[:, np.arange(ndraws), factor_sampled_idxs]

Devolvemos un nuevo idata cuando se samplean nuevos niveles? Me parece mas "safe" que modificar el original... Tenemos que agregar una nueva dimension para el "nuevo nivel"

In [24]:
import formulae
from formulae import design_matrices

In [27]:
dm = design_matrices("Reaction ~ 1 + Days + (1 + Days|Subject)", data)
dm

DesignMatrices

                  (rows, cols)
Response:               (180,)
Common:               (180, 2)
Group-specific:      (180, 36)

Use .response, .common, or .group to access the different members.

In [39]:
df_new = data.head(10).reset_index(drop=True)
df_new["Subject"] = "xxx"
df_new = pd.concat([df_new, data.head(10)])
df_new = df_new.reset_index(drop=True)
df_new

Unnamed: 0,Reaction,Days,Subject
0,249.56,0,xxx
1,258.7047,1,xxx
2,250.8006,2,xxx
3,321.4398,3,xxx
4,356.8519,4,xxx
5,414.6901,5,xxx
6,382.2038,6,xxx
7,290.1486,7,xxx
8,430.5853,8,xxx
9,466.3535,9,xxx


In [43]:
formulae.config["EVAL_UNSEEN_CATEGORIES"] = "warning"

In [46]:
dm_new = dm.group.evaluate_new_data(df_new[10:])
dm_new

GroupEffectsMatrix with shape (10, 36)
Terms:  
  1|Subject  
    kind: intercept
    groups: ['308', '309', '310', '330', '331', '332', '333', '334', '335', '337', '349', '350',
      '351', '352', '369', '370', '371', '372']
    columns: 0:18
  Days|Subject  
    kind: numeric
    groups: ['308', '309', '310', '330', '331', '332', '333', '334', '335', '337', '349', '350',
      '351', '352', '369', '370', '371', '372']
    columns: 18:36

To access the actual design matrix do 'np.array(this_obj)'

In [61]:
np.core.arrayprint._line_width = 80
np.set_printoptions(edgeitems=3)
np.core.arrayprint._line_width = 120

In [62]:
dm_new = dm.group.evaluate_new_data(df_new[10:])
print(np.asarray(dm_new))

[[1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 7 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 8 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]]


In [63]:
dm_new = dm.group.evaluate_new_data(df_new[:10])
print(np.asarray(dm_new))

[[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]]




Necesitamos otros cambios a nivel de formulae

* Si hay observaciones nuevas, se agrega un nuevo grupo, a la derecha de los grupos existentes.
    * Todos los nuevos grupos son interpretados como UN UNICO nuevo grupo
    * No va a tener sentido para los efectos fijos
    * Si va a tener sentido para los efectos aleatorios