In [None]:
import arviz as az
import numpy as np
import pandas as pd
import pymc as pm
from pymc.math import dot, sqr

In [None]:
data = pd.read_csv("rabbits.csv")
data

Unnamed: 0,time,temp
0,24.0,102.8
1,32.0,104.5
2,48.0,106.5
3,56.0,107.0
4,,107.1
5,70.0,105.1
6,72.0,103.9
7,75.0,
8,80.0,103.2
9,96.0,102.1


In [None]:
data.mean(), data.std()

(time     61.444444
 temp    104.688889
 dtype: float64,
 time    23.415332
 temp     1.862421
 dtype: float64)

In [None]:
y = data["temp"].to_numpy()
y = np.nan_to_num(y, nan=-1)
y = np.ma.masked_values(y, value=-1)

x = data["time"].to_numpy()
x = np.nan_to_num(x, nan=-1)
x = np.ma.masked_values(x, value=-1)

In [None]:
x

masked_array(data=[24.0, 32.0, 48.0, 56.0, --, 70.0, 72.0, 75.0, 80.0,
                   96.0],
             mask=[False, False, False, False,  True, False, False, False,
                   False, False],
       fill_value=-1.0)

Unlike dugongs, the missing values are also in the x data. Need to impute those values from a specified distribution, rather than from the likelihood like y values.

In [10]:
inits = {
    "beta0": np.array(0.0),
    "beta1": np.array(0.0),
    "x_imputed": 50,
}

In [11]:
# q1a
with pm.Model() as m:
    # priors
    beta0 = pm.Normal("beta0", mu=0, tau=0.0001)
    beta1 = pm.Normal("beta1", mu=0, tau=0.0001)
    tau = pm.Gamma("tau", 0.001, 0.001)

    # x_imputed = pm.TruncatedNormal("x_imputed", mu=60, sigma=30, lower=1, upper=120, observed=x)
    x_imputed = pm.Uniform("x_imputed", 20, 100, observed=x)

    mu = beta0 + beta1 * x_imputed

    likelihood = pm.Normal("likelihood", mu=mu, tau=tau, observed=y)

    # start sampling
    trace = pm.sample(
        2000,
        chains=4,
        tune=500,
        cores=4,
        init="adapt_diag",
        random_seed=1,
        return_inferencedata=True,
        initvals=inits
    )

Multiprocess sampling (4 chains in 4 jobs)
NUTS: [beta0, beta1, tau, x_imputed_missing, likelihood_missing]


  return _boost._beta_ppf(q, a, b)
  return _boost._beta_ppf(q, a, b)
  return _boost._beta_ppf(q, a, b)
  return _boost._beta_ppf(q, a, b)
Sampling 4 chains for 500 tune and 2_000 draw iterations (2_000 + 8_000 draws total) took 17 seconds.
There was 1 divergence after tuning. Increase `target_accept` or reparameterize.
The acceptance probability does not match the target. It is 0.882, but should be close to 0.8. Try to increase the number of tuning steps.
The acceptance probability does not match the target. It is 0.8831, but should be close to 0.8. Try to increase the number of tuning steps.
There was 1 divergence after tuning. Increase `target_accept` or reparameterize.
The acceptance probability does not match the target. It is 0.9119, but should be close to 0.8. Try to increase the number of tuning steps.


In [49]:
az.summary(trace, hdi_prob=0.95)

Unnamed: 0,mean,sd,hdi_2.5%,hdi_97.5%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
beta0,105.922,2.127,101.639,110.093,0.041,0.029,2752.0,2978.0,1.0
beta1,-0.021,0.034,-0.087,0.046,0.001,0.0,2721.0,2882.0,1.0
likelihood_missing[0],104.328,2.448,99.436,109.28,0.037,0.026,4584.0,3359.0,1.0
tau,0.286,0.156,0.034,0.584,0.003,0.002,2767.0,3030.0,1.0
x_imputed_missing[0],53.112,22.815,20.035,93.832,0.359,0.256,3747.0,3299.0,1.0


In [50]:
# q1b
with pm.Model() as m:
    # priors
    beta0 = pm.Normal("beta0", mu=0, tau=0.00001)
    beta1 = pm.Normal("beta1", mu=0, tau=0.00001)
    beta2 = pm.Normal("beta2", mu=0, tau=0.00001)
    tau = pm.Gamma("tau", alpha=0.001, beta=0.001)

    x_imputed = pm.Uniform("x_imputed", 20, 100, observed=x)

    mu = beta0 + beta1 * x_imputed + beta2 * sqr(x_imputed)

    likelihood = pm.Normal("likelihood", mu=mu, tau=tau, observed=y)

    # start sampling
    trace = pm.sample(
        10000,  # samples
        chains=4,
        tune=500,
        cores=4,
        init="jitter+adapt_diag",
        random_seed=1,
        return_inferencedata=True,
    )

Multiprocess sampling (4 chains in 4 jobs)
NUTS: [beta0, beta1, beta2, tau, x_imputed_missing, likelihood_missing]


  return _boost._beta_ppf(q, a, b)
  return _boost._beta_ppf(q, a, b)
  return _boost._beta_ppf(q, a, b)
  return _boost._beta_ppf(q, a, b)
Sampling 4 chains for 500 tune and 10_000 draw iterations (2_000 + 40_000 draws total) took 94 seconds.
There were 210 divergences after tuning. Increase `target_accept` or reparameterize.
There were 676 divergences after tuning. Increase `target_accept` or reparameterize.
There were 36 divergences after tuning. Increase `target_accept` or reparameterize.
The acceptance probability does not match the target. It is 0.9152, but should be close to 0.8. Try to increase the number of tuning steps.
There were 594 divergences after tuning. Increase `target_accept` or reparameterize.


In [52]:
az.summary(trace, hdi_prob=0.95)

Unnamed: 0,mean,sd,hdi_2.5%,hdi_97.5%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
beta0,97.486,3.276,91.01,104.109,0.036,0.025,7963.0,11464.0,1.0
beta1,0.316,0.119,0.082,0.56,0.001,0.001,7860.0,10856.0,1.0
beta2,-0.003,0.001,-0.005,-0.001,0.0,0.0,8295.0,11085.0,1.0
likelihood_missing[0],104.954,1.566,101.702,107.963,0.012,0.009,14995.0,14846.0,1.0
tau,0.804,0.472,0.059,1.733,0.018,0.016,1035.0,330.0,1.0
x_imputed_missing[0],54.684,13.027,27.92,79.164,0.116,0.082,12283.0,12655.0,1.0
