In [1]:
import pymc3 as pm
import arviz as az
import numpy as np

# Data Extraction

In [2]:
data_array = np.loadtxt('babies.csv', delimiter=',', skiprows=1)
early_clamp_data = data_array[:, 0]
late_clamp_data = data_array[:, 1]

# Model

In [3]:
with pm.Model() as m:
    
    # 1. Sample prior for early-clamp likelihood
    a_1 = pm.Gamma("a1", 0.001, 0.001) 
    b_1 = pm.Gamma("b1", 0.001, 0.001) 

    # 2. Sample prior for late-clamp likelihood
    a_2 = pm.Gamma("a2", 0.001, 0.001) 
    b_2 = pm.Gamma("b2", 0.001, 0.001) 

    # 3. Inject rate and shape parameters into the early-clamp likelihood
    pm.Gamma("likelihood1", a_1, b_1, observed=early_clamp_data)

    # 4. Inject rate and shape parameters into the early-clamp likelihood
    pm.Gamma("likelihood2", a_2, b_2, observed=late_clamp_data)

    # 5. Compute the difference in means
    diff_in_means = pm.Deterministic('diff_in_means', (a_1/b_1) - (a_2/b_2))
    
    # 5. Create trace
    trace = pm.sample(3000, target_accept=0.95, return_inferencedata=False)

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [b2, a2, b1, a1]


  return _boost._beta_ppf(q, a, b)
  return _boost._beta_ppf(q, a, b)
  return _boost._beta_ppf(q, a, b)
  return _boost._beta_ppf(q, a, b)
Sampling 4 chains for 1_000 tune and 3_000 draw iterations (4_000 + 12_000 draws total) took 12 seconds.


# Show Statistics

In [14]:
az.summary(trace, hdi_prob=0.90)

Got error No model on context stack. trying to find log_likelihood in translation.


Unnamed: 0,mean,sd,hdi_5%,hdi_95%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
a1,34.779,12.584,15.134,54.905,0.208,0.147,3394.0,2997.0,1.0
b1,3.609,1.316,1.425,5.587,0.022,0.015,3388.0,3006.0,1.0
a2,28.042,10.216,11.828,43.611,0.173,0.122,3309.0,3340.0,1.0
b2,2.319,0.852,0.956,3.596,0.014,0.01,3300.0,3543.0,1.0
diff_in_means,-2.466,0.759,-3.667,-1.167,0.007,0.005,11876.0,8257.0,1.0


# Conclusion

No, the 90% Credible Set does not contain 0 since the range is entirely below 0, from -3.667 to -1.167. This suggests that there is a statistically significant difference in the means of the two procedures at the 90% confidence level. In this case, since the 90% HDI for `diff_in_means` is entirely below zero, it suggests that the first mean is lower than the second with 90% confidence.