In [1]:
import pymc as pm
import numpy as np
import arviz as az
from pymc.math import matrix_inverse, extract_diag, sqrt
import aesara.tensor as at

%load_ext lab_black
%load_ext watermark

# Dental Development


Adapted from [Unit 10: growth.odc](https://raw.githubusercontent.com/areding/6420-pymc/main/original_examples/Codes4Unit10/growth.odc).

Data for the y array can be found [here](https://raw.githubusercontent.com/areding/6420-pymc/main/data/growthy.txt).

Associated lecture video: Unit 10 lesson 2

## Problem statement


Data set on dental development was first provided by Potthof and Roy in their 1964 paper. It consists of longitudinal observations on 11 girls (gender=1) and 16 boys (gender=2). 

There are 4 observations on each subject centered at times -3,-1, 1, 3, where the units were years.

The measurement on each subject is the distance (in mm) from the center of the pituitary to the pteryomaxillary fisure.

Potthoff and Roy (1964). "A Generalized Multivariate Analysis of Variance Model Useful Especially for Growth Curve Problems," Biometrika, 51, 313-326.

MVN with Gender Specific Means but Common Precision Matrix

notes:
- Wishart. Pymc docs say it's unusable???
- https://github.com/pymc-devs/pymc/issues/538 interesting discussion here
- https://austinrochford.com/posts/2015-09-16-mvn-pymc3-lkj.html


Currently have a working version but not sure if it's correct. I split the likelihoods for male and female with a shared covariance matrix. Results are sort of in-line with BUGS. Not sure if the difference is from the different prior on the covariance matrix or something else. A better way would be to use the coordinate system but I couldn't get it working with the multivariate normal likelihood.

In [270]:
time = np.array([-3, -1, 1, 3])
y = np.loadtxt("../data/growthy.txt")

In [279]:
with pm.Model() as m_double:
    beta1 = pm.Normal("beta1", 20, tau=0.001, shape=2)
    beta2 = pm.Normal("beta2", 1, tau=0.001, shape=2)

    sd_dist = pm.Normal.dist(0, 2, shape=4)
    T, corr, _ = pm.LKJCholeskyCov("T", n=4, eta=2, sd_dist=sd_dist, compute_corr=True)

    mu_male = pm.Deterministic("mu_male", beta1[0] + beta2[0] * time)
    mu_female = pm.Deterministic("mu_female", beta1[1] + beta2[1] * time)

    pm.MvNormal("likelihood_male", mu_male, chol=T, shape=(11, 4), observed=y[:12, :])
    pm.MvNormal(
        "likelihood_female", mu_female, chol=T, shape=(15, 4), observed=y[11:, :]
    )

    pm.Deterministic("corr", corr)

    trace = pm.sample(1000)

Auto-assigning NUTS sampler...
INFO:pymc:Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
INFO:pymc:Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
INFO:pymc:Multiprocess sampling (4 chains in 4 jobs)
NUTS: [beta1, beta2, T]
INFO:pymc:NUTS: [beta1, beta2, T]


Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 58 seconds.
INFO:pymc:Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 58 seconds.


In [280]:
az.summary(trace, var_names="beta", filter_vars="like")

Unnamed: 0,mean,sd,hdi_3%,hdi_97%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
beta1[0],23.044,0.481,22.113,23.913,0.008,0.006,3690.0,2900.0,1.0
beta1[1],25.164,0.464,24.318,26.056,0.008,0.006,3388.0,2558.0,1.0
beta2[0],0.523,0.11,0.304,0.721,0.002,0.001,3505.0,2709.0,1.0
beta2[1],0.784,0.111,0.57,0.982,0.002,0.001,3199.0,2508.0,1.0


In [281]:
az.summary(trace, var_names="corr")

  (between_chain_variance / within_chain_variance + num_samples - 1) / (num_samples)


Unnamed: 0,mean,sd,hdi_3%,hdi_97%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
"corr[0, 0]",1.0,0.0,1.0,1.0,0.0,0.0,4000.0,4000.0,
"corr[0, 1]",0.474,0.139,0.207,0.71,0.003,0.002,2128.0,2233.0,1.0
"corr[0, 2]",0.619,0.111,0.419,0.814,0.003,0.002,1917.0,1840.0,1.0
"corr[0, 3]",0.405,0.148,0.129,0.669,0.003,0.002,2218.0,2594.0,1.0
"corr[1, 0]",0.474,0.139,0.207,0.71,0.003,0.002,2128.0,2233.0,1.0
"corr[1, 1]",1.0,0.0,1.0,1.0,0.0,0.0,3923.0,3798.0,1.0
"corr[1, 2]",0.721,0.086,0.562,0.865,0.002,0.001,2653.0,2698.0,1.0
"corr[1, 3]",0.547,0.129,0.301,0.764,0.003,0.002,2315.0,2274.0,1.0
"corr[2, 0]",0.619,0.111,0.419,0.814,0.003,0.002,1917.0,1840.0,1.0
"corr[2, 1]",0.721,0.086,0.562,0.865,0.002,0.001,2653.0,2698.0,1.0


In [282]:
az.summary(trace, var_names=["mu_male", "mu_female"])

Unnamed: 0,mean,sd,hdi_3%,hdi_97%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
mu_male[0],21.477,0.525,20.498,22.486,0.008,0.006,4318.0,2621.0,1.0
mu_male[1],22.522,0.471,21.615,23.402,0.007,0.005,3942.0,2888.0,1.0
mu_male[2],23.567,0.515,22.606,24.532,0.009,0.006,3511.0,2852.0,1.0
mu_male[3],24.612,0.636,23.386,25.732,0.011,0.008,3301.0,2336.0,1.0
mu_female[0],22.811,0.503,21.946,23.829,0.008,0.006,3936.0,3159.0,1.0
mu_female[1],24.38,0.451,23.496,25.178,0.008,0.005,3599.0,2761.0,1.0
mu_female[2],25.948,0.501,25.028,26.894,0.009,0.006,3133.0,2443.0,1.0
mu_female[3],27.517,0.63,26.343,28.706,0.012,0.008,2863.0,2413.0,1.0


In [269]:
%watermark -n -u -v -iv -p aesara,aeppl

Last updated: Mon Jul 25 2022

Python implementation: CPython
Python version       : 3.10.5
IPython version      : 8.4.0

aesara: 2.7.3
aeppl : 0.0.31

pandas: 1.4.3
numpy : 1.23.0
aesara: 2.7.3
pymc  : 4.0.1
arviz : 0.12.1

