In [1]:
import arviz as az
import numpy as np
import pandas as pd
import pymc as pm

In [2]:
data = pd.read_csv("ortho.csv")
data

Unnamed: 0,y,age,Subject,Sex,Sex_coded
0,26.0,8,M01,Male,1
1,25.0,10,M01,Male,1
2,29.0,12,M01,Male,1
3,31.0,14,M01,Male,1
4,21.5,8,M02,Male,1
...,...,...,...,...,...
103,19.5,14,F10,Female,-1
104,24.5,8,F11,Female,-1
105,25.0,10,F11,Female,-1
106,28.0,12,F11,Female,-1


In [5]:
data["age_idx"] = [0, 1, 2, 3] * 27
data["subject_idx"] = np.repeat(np.arange(0, 27), 4)
data

Unnamed: 0,y,age,Subject,Sex,Sex_coded,age_idx,subject_idx
0,26.0,8,M01,Male,1,0,0
1,25.0,10,M01,Male,1,1,0
2,29.0,12,M01,Male,1,2,0
3,31.0,14,M01,Male,1,3,0
4,21.5,8,M02,Male,1,0,1
...,...,...,...,...,...,...,...
103,19.5,14,F10,Female,-1,3,25
104,24.5,8,F11,Female,-1,0,26
105,25.0,10,F11,Female,-1,1,26
106,28.0,12,F11,Female,-1,2,26


In [6]:
y_by_subject = data.pivot(index="subject_idx", columns=["age_idx"])["y"].to_numpy()
# now row index represents subject, column index represents ages 8, 10, 12, 14
y_by_subject

array([[26. , 25. , 29. , 31. ],
       [21.5, 22.5, 23. , 26.5],
       [23. , 22.5, 24. , 27.5],
       [25.5, 27.5, 26.5, 27. ],
       [20. , 23.5, 22.5, 26. ],
       [24.5, 25.5, 27. , 28.5],
       [22. , 22. , 24.5, 26.5],
       [24. , 21.5, 24.5, 25.5],
       [23. , 20.5, 31. , 26. ],
       [27.5, 28. , 31. , 31.5],
       [23. , 23. , 23.5, 25. ],
       [21.5, 23.5, 24. , 28. ],
       [17. , 24.5, 26. , 29.5],
       [22.5, 25.5, 25.5, 26. ],
       [23. , 24.5, 26. , 30. ],
       [22. , 21.5, 23.5, 25. ],
       [21. , 20. , 21.5, 23. ],
       [21. , 21.5, 24. , 25.5],
       [20.5, 24. , 24.5, 26. ],
       [23.5, 24.5, 25. , 26.5],
       [21.5, 23. , 22.5, 23.5],
       [20. , 21. , 21. , 22.5],
       [21.5, 22.5, 23. , 25. ],
       [23. , 23. , 23.5, 24. ],
       [20. , 21. , 22. , 21.5],
       [16.5, 19. , 19. , 19.5],
       [24.5, 25. , 28. , 28. ]])

In [7]:
sex_coded_by_subject = data.groupby("subject_idx")["Sex_coded"].mean().to_numpy()
sex_coded_by_subject

array([ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
        1.,  1.,  1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1.])

In [8]:
age_long = np.array([8, 10 , 12, 14] * 27).reshape(27, 4)
age_long

array([[ 8, 10, 12, 14],
       [ 8, 10, 12, 14],
       [ 8, 10, 12, 14],
       [ 8, 10, 12, 14],
       [ 8, 10, 12, 14],
       [ 8, 10, 12, 14],
       [ 8, 10, 12, 14],
       [ 8, 10, 12, 14],
       [ 8, 10, 12, 14],
       [ 8, 10, 12, 14],
       [ 8, 10, 12, 14],
       [ 8, 10, 12, 14],
       [ 8, 10, 12, 14],
       [ 8, 10, 12, 14],
       [ 8, 10, 12, 14],
       [ 8, 10, 12, 14],
       [ 8, 10, 12, 14],
       [ 8, 10, 12, 14],
       [ 8, 10, 12, 14],
       [ 8, 10, 12, 14],
       [ 8, 10, 12, 14],
       [ 8, 10, 12, 14],
       [ 8, 10, 12, 14],
       [ 8, 10, 12, 14],
       [ 8, 10, 12, 14],
       [ 8, 10, 12, 14],
       [ 8, 10, 12, 14]])

In [9]:
sex_by_sub = np.repeat(sex_coded_by_subject, 4).reshape(27, 4)
sex_by_sub

array([[ 1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.],
       [-1., -1., -1., -1.],
       [-1., -1., -1., -1.],
       [-1., -1., -1., -1.],
       [-1., -1., -1., -1.],
       [-1., -1., -1., -1.],
       [-1., -1., -1., -1.],
       [-1., -1., -1., -1.],
       [-1., -1., -1., -1.],
       [-1., -1., -1., -1.],
       [-1., -1., -1., -1.],
       [-1., -1., -1., -1.]])

In [14]:
y_by_subject

array([[26. , 25. , 29. , 31. ],
       [21.5, 22.5, 23. , 26.5],
       [23. , 22.5, 24. , 27.5],
       [25.5, 27.5, 26.5, 27. ],
       [20. , 23.5, 22.5, 26. ],
       [24.5, 25.5, 27. , 28.5],
       [22. , 22. , 24.5, 26.5],
       [24. , 21.5, 24.5, 25.5],
       [23. , 20.5, 31. , 26. ],
       [27.5, 28. , 31. , 31.5],
       [23. , 23. , 23.5, 25. ],
       [21.5, 23.5, 24. , 28. ],
       [17. , 24.5, 26. , 29.5],
       [22.5, 25.5, 25.5, 26. ],
       [23. , 24.5, 26. , 30. ],
       [22. , 21.5, 23.5, 25. ],
       [21. , 20. , 21.5, 23. ],
       [21. , 21.5, 24. , 25.5],
       [20.5, 24. , 24.5, 26. ],
       [23.5, 24.5, 25. , 26.5],
       [21.5, 23. , 22.5, 23.5],
       [20. , 21. , 21. , 22.5],
       [21.5, 22.5, 23. , 25. ],
       [23. , 23. , 23.5, 24. ],
       [20. , 21. , 22. , 21.5],
       [16.5, 19. , 19. , 19.5],
       [24.5, 25. , 28. , 28. ]])

In [47]:
y_by_subject.shape

(27, 4)

In [53]:
age1 = np.array([8, 10, 12, 14])
age1


array([ 8, 10, 12, 14])

In [54]:
sex1 = sex_coded_by_subject.reshape(-1, 1)
sex1

array([[ 1.],
       [ 1.],
       [ 1.],
       [ 1.],
       [ 1.],
       [ 1.],
       [ 1.],
       [ 1.],
       [ 1.],
       [ 1.],
       [ 1.],
       [ 1.],
       [ 1.],
       [ 1.],
       [ 1.],
       [ 1.],
       [-1.],
       [-1.],
       [-1.],
       [-1.],
       [-1.],
       [-1.],
       [-1.],
       [-1.],
       [-1.],
       [-1.],
       [-1.]])

In [52]:
with pm.Model() as m_G:
    tau_eps = pm.Gamma("tau_eps", 0.001, 0.001)
    tau_u = pm.Gamma("tau_u", 0.001, 0.001)
    
    sigma2_eps = pm.Deterministic("sigma2_eps", 1/tau_eps)
    sigma2_u = pm.Deterministic("sigma2_u", 1/tau_u)

    u_i = pm.Normal("u_i", 0, tau=tau_u)

    beta0 = pm.Normal("β0_intercept", mu=0, tau=1e-6)
    beta1 = pm.Normal("β1_age", mu=0, tau=1e-6)
    beta2 = pm.Normal("β2_sex", mu=0, tau=1e-6)

    mu = beta0 + beta1 * age1 + beta2 * sex1 + u_i

    likelihood = pm.Normal("likelihood", mu=mu, tau=tau_eps, observed=y_by_subject)
    
    rho = pm.Deterministic("ρ", sigma2_eps/(sigma2_eps + sigma2_u))
    
    trace = pm.sample(3000)
    
az.summary(trace, hdi_prob=.95, var_names=["~u_i", "~μ", "~tau_e", "~tau_u"])

Multiprocess sampling (4 chains in 4 jobs)
NUTS: [tau_eps, tau_u, u_i, β0_intercept, β1_age, β2_sex]


  return _boost._beta_ppf(q, a, b)
  return _boost._beta_ppf(q, a, b)
  return _boost._beta_ppf(q, a, b)
  return _boost._beta_ppf(q, a, b)
Sampling 4 chains for 1_000 tune and 3_000 draw iterations (4_000 + 12_000 draws total) took 244 seconds.
There were 126 divergences after tuning. Increase `target_accept` or reparameterize.
The acceptance probability does not match the target. It is 0.9273, but should be close to 0.8. Try to increase the number of tuning steps.
The chain reached the maximum tree depth. Increase max_treedepth, increase target_accept or reparameterize.
There were 1175 divergences after tuning. Increase `target_accept` or reparameterize.
The acceptance probability does not match the target. It is 0.5656, but should be close to 0.8. Try to increase the number of tuning steps.
There were 106 divergences after tuning. Increase `target_accept` or reparameterize.
The acceptance probability does not match the target. It is 0.9117, but should be close to 0.8. Try to increas

Unnamed: 0,mean,sd,hdi_2.5%,hdi_97.5%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
β0_intercept,22.341,115.163,-188.835,248.028,7.635,7.759,231.0,134.0,1.33
β1_age,0.674,0.075,0.495,0.816,0.013,0.009,26.0,117.0,1.15
β2_sex,1.032,0.252,0.615,1.466,0.099,0.073,7.0,19.0,1.53
tau_eps,0.188,0.021,0.146,0.235,0.003,0.002,40.0,150.0,1.35
sigma2_eps,5.388,0.569,4.047,6.483,0.064,0.046,40.0,150.0,1.35
sigma2_u,16016880.0,783578600.0,0.009,231373.755,9959929.681,7043065.735,12.0,104.0,1.32
ρ,0.532,0.396,0.0,0.994,0.131,0.106,12.0,109.0,1.42


In [18]:
with pm.Model() as m_IG:
    sigma2_eps = pm.InverseGamma("σ2_ϵ", 0.01, 0.01)
    sigma2_u = pm.InverseGamma("σ2_u", 0.01, 0.01)
    
    tau_e = pm.Deterministic("tau_e", 1/sigma2_eps)
    tau_u = pm.Deterministic("tau_u", 1/sigma2_u)

    
    u_i = pm.Normal("u_i", 0, tau=tau_u)

    beta0 = pm.Normal("β0_intercept", mu=0, tau=1e-4)
    beta1 = pm.Normal("β1_age", mu=0, tau=1e-4)
    beta2 = pm.Normal("β2_sex", mu=0, tau=1e-4)

    mu = pm.Deterministic(
        "μ", beta0 + beta1 * age_long + beta2 * sex_by_sub + u_i
    )

    likelihood = pm.Normal("likelihood", mu=mu, tau=tau_e, observed=y_by_subject)
    
    rho = pm.Deterministic("ρ", sigma2_eps/(sigma2_eps + sigma2_u))
    
    trace = pm.sample(5000)
    
az.summary(trace, hdi_prob=.95, var_names=["~u_i", "~μ", "~tau_e", "~tau_u"])

Multiprocess sampling (4 chains in 4 jobs)
NUTS: [σ2_ϵ, σ2_u, u_i, β0_intercept, β1_age, β2_sex]


  return _boost._beta_ppf(q, a, b)
  return _boost._beta_ppf(q, a, b)
  return _boost._beta_ppf(q, a, b)
  return _boost._beta_ppf(q, a, b)
Sampling 4 chains for 1_000 tune and 5_000 draw iterations (4_000 + 20_000 draws total) took 134 seconds.
There were 313 divergences after tuning. Increase `target_accept` or reparameterize.
There were 2827 divergences after tuning. Increase `target_accept` or reparameterize.
The acceptance probability does not match the target. It is 0.2539, but should be close to 0.8. Try to increase the number of tuning steps.
There were 1006 divergences after tuning. Increase `target_accept` or reparameterize.
The acceptance probability does not match the target. It is 0.6951, but should be close to 0.8. Try to increase the number of tuning steps.
There were 3205 divergences after tuning. Increase `target_accept` or reparameterize.
The acceptance probability does not match the target. It is 0.1288, but should be close to 0.8. Try to increase the number of tunin

Unnamed: 0,mean,sd,hdi_2.5%,hdi_97.5%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
β0_intercept,19.517,25.059,-26.59,59.288,3.388,2.409,10.0,70.0,1.29
β1_age,0.638,0.077,0.503,0.795,0.024,0.017,11.0,62.0,1.3
β2_sex,1.002,0.286,0.406,1.433,0.125,0.093,6.0,11.0,1.72
σ2_ϵ,5.398,0.949,3.777,7.015,0.413,0.31,6.0,12.0,1.96
σ2_u,398448.902,31274640.0,0.029,9556.786,289245.187,204532.336,9.0,38.0,1.51
tau_e,0.191,0.034,0.14,0.258,0.015,0.011,6.0,12.0,1.92
tau_u,1.637,3.926,0.0,10.55,1.206,0.877,9.0,38.0,1.37
ρ,0.358,0.371,0.0,0.978,0.136,0.1,10.0,74.0,1.35


- check data
- test ig vs g
- check shapes
- 