In [1]:
import pymc as pm
import numpy as np
import arviz as az
from pymc.math import matrix_inverse, extract_diag, sqrt
import pytensor.tensor as pt

%load_ext lab_black

# Dental Development*


Adapted from [Unit 10: growth.odc](https://raw.githubusercontent.com/areding/6420-pymc/main/original_examples/Codes4Unit10/growth.odc).

Data for the y array can be found [here](https://raw.githubusercontent.com/areding/6420-pymc/main/data/growthy.txt).

Data set on dental development was first provided by Potthof and Roy in their 1964 paper. It consists of longitudinal observations on 11 girls (gender=1) and 16 boys (gender=2). 

There are 4 observations on each subject centered at times -3,-1, 1, 3, where the units were years.

The measurement on each subject is the distance (in mm) from the center of the pituitary to the pteryomaxillary fisure.

Potthoff and Roy (1964). "A Generalized Multivariate Analysis of Variance Model Useful Especially for Growth Curve Problems," Biometrika, 51, 313-326.

MVN with Gender Specific Means but Common Precision Matrix

## Notes
- Wishart. Pymc docs say it's unusable.
- https://github.com/pymc-devs/pymc/issues/538 interesting discussion here
- https://austinrochford.com/posts/2015-09-16-mvn-pymc3-lkj.html

Currently have a working version but not sure if it's correct. I split the likelihoods for male and female with a shared covariance matrix. Results are sort of in-line with BUGS. Not sure if the difference is from the different prior on the covariance matrix or something else. A better way would be to use the coordinate system but I couldn't get it working with the multivariate normal likelihood.

In [2]:
time = np.array([-3, -1, 1, 3])
y = np.loadtxt("../data/growthy.txt")

In [3]:
with pm.Model() as m_double:
    beta1 = pm.Normal("beta1", 20, tau=0.001, shape=2)
    beta2 = pm.Normal("beta2", 1, tau=0.001, shape=2)

    sd_dist = pm.Normal.dist(0, 2, shape=4)
    T, corr, _ = pm.LKJCholeskyCov("T", n=4, eta=2, sd_dist=sd_dist, compute_corr=True)

    mu_male = pm.Deterministic("mu_male", beta1[0] + beta2[0] * time)
    mu_female = pm.Deterministic("mu_female", beta1[1] + beta2[1] * time)

    pm.MvNormal("likelihood_male", mu_male, chol=T, shape=(11, 4), observed=y[:12, :])
    pm.MvNormal(
        "likelihood_female", mu_female, chol=T, shape=(15, 4), observed=y[11:, :]
    )

    pm.Deterministic("corr", corr)

    trace = pm.sample(1000)

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [beta1, beta2, T]


  self.vm()
  self.vm()
Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 75 seconds.


In [4]:
az.summary(trace, var_names="beta", filter_vars="like")

Unnamed: 0,mean,sd,hdi_3%,hdi_97%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
beta1[0],23.053,0.485,22.114,23.969,0.008,0.006,3493.0,2614.0,1.0
beta1[1],25.165,0.463,24.331,26.078,0.007,0.005,3791.0,2803.0,1.0
beta2[0],0.525,0.111,0.321,0.732,0.002,0.001,3815.0,2934.0,1.0
beta2[1],0.78,0.112,0.583,1.006,0.002,0.001,3054.0,2611.0,1.0


In [5]:
az.summary(trace, var_names="corr")

  (between_chain_variance / within_chain_variance + num_samples - 1) / (num_samples)


Unnamed: 0,mean,sd,hdi_3%,hdi_97%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
"corr[0, 0]",1.0,0.0,1.0,1.0,0.0,0.0,4000.0,4000.0,
"corr[0, 1]",0.475,0.134,0.219,0.711,0.003,0.002,2428.0,2512.0,1.0
"corr[0, 2]",0.622,0.108,0.412,0.805,0.002,0.002,2199.0,2435.0,1.0
"corr[0, 3]",0.408,0.143,0.152,0.674,0.003,0.002,2611.0,2983.0,1.0
"corr[1, 0]",0.475,0.134,0.219,0.711,0.003,0.002,2428.0,2512.0,1.0
"corr[1, 1]",1.0,0.0,1.0,1.0,0.0,0.0,3778.0,3626.0,1.0
"corr[1, 2]",0.723,0.087,0.558,0.865,0.002,0.001,3248.0,3162.0,1.0
"corr[1, 3]",0.553,0.125,0.311,0.763,0.002,0.002,2836.0,2842.0,1.0
"corr[2, 0]",0.622,0.108,0.412,0.805,0.002,0.002,2199.0,2435.0,1.0
"corr[2, 1]",0.723,0.087,0.558,0.865,0.002,0.001,3248.0,3162.0,1.0


In [6]:
az.summary(trace, var_names=["mu_male", "mu_female"])

Unnamed: 0,mean,sd,hdi_3%,hdi_97%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
mu_male[0],21.48,0.52,20.479,22.421,0.008,0.005,4702.0,2851.0,1.0
mu_male[1],22.529,0.471,21.658,23.449,0.008,0.005,3863.0,2918.0,1.0
mu_male[2],23.578,0.522,22.603,24.58,0.009,0.006,3264.0,2647.0,1.0
mu_male[3],24.627,0.649,23.409,25.849,0.012,0.008,3124.0,2590.0,1.0
mu_female[0],22.825,0.501,21.869,23.726,0.007,0.005,4741.0,3318.0,1.0
mu_female[1],24.385,0.449,23.532,25.217,0.007,0.005,4437.0,3069.0,1.0
mu_female[2],25.945,0.502,25.052,26.946,0.009,0.006,3396.0,2644.0,1.0
mu_female[3],27.505,0.635,26.273,28.671,0.011,0.008,3084.0,2686.0,1.0


In [8]:
%load_ext watermark
%watermark -n -u -v -iv -p pytensor

The watermark extension is already loaded. To reload it, use:
  %reload_ext watermark
Last updated: Wed Mar 22 2023

Python implementation: CPython
Python version       : 3.11.0
IPython version      : 8.9.0

pytensor: 2.10.1

pymc    : 5.1.2
numpy   : 1.24.2
arviz   : 0.15.1
pytensor: 2.10.1

