In [1]:
import pymc as pm
pm.__version__

'5.26.1'

In [2]:
seed = 42
x_dist = pm.Normal.dist(shape=(100, 3))
x_data = pm.draw(x_dist, random_seed=seed)

In [3]:
x_data.shape

(100, 3)

In [4]:
x_data

array([[ 0.41832997,  0.60557617,  0.02878786],
       [-1.084246  ,  1.46422098,  0.29072736],
       [-1.33075642, -0.03472346,  0.28041847],
       [ 0.10749307, -1.92080086,  1.57864499],
       [ 1.00595719,  0.45121505, -0.59343367],
       [ 0.09382112,  1.85195867, -0.25590475],
       [-0.28298637,  0.415816  , -1.08877401],
       [-1.96729165,  0.88737846, -1.32823784],
       [-0.13157981, -0.36196929,  0.7820311 ],
       [ 0.28266399, -1.00595013,  0.01851214],
       [-1.24315953,  2.60337585,  0.15139223],
       [-0.51553062, -0.2196374 ,  0.40234591],
       [ 1.36128828,  0.74287737,  0.93685218],
       [ 0.17547031,  1.52520418,  0.09821447],
       [-1.16490357,  0.52358791, -1.06559789],
       [-0.31079113,  0.5559524 , -0.09963476],
       [-0.25769078, -1.58951869, -1.81491229],
       [ 0.53617305,  1.27138979, -0.55403891],
       [ 1.72433064, -0.31178569,  0.06331837],
       [ 1.38212765,  0.58472813, -0.50975014],
       [ 0.2513335 ,  0.40621724,  0.865

In [5]:
coords={
 "trial": range(100),
 "features": ["sunlight hours", "water amount", "soil nitrogen"],
}

In [6]:
coords

{'trial': range(0, 100),
 'features': ['sunlight hours', 'water amount', 'soil nitrogen']}

In [7]:
with pm.Model(coords=coords) as generative_model:
   x = pm.Data("x", x_data, dims=["trial", "features"])

   # Model parameters
   betas = pm.Normal("betas", dims="features")
   sigma = pm.HalfNormal("sigma")

   # Linear model
   mu = x @ betas

   # Likelihood
   # Assuming we measure deviation of each plant from baseline
   plant_growth = pm.Normal("plant growth", mu, sigma, dims="trial")

In [8]:
plant_growth

plant growth ~ Normal(f(betas), sigma)

In [9]:
fixed_parameters = {
 "betas": [5, 20, 2],
 "sigma": 0.5,
}

In [10]:
with pm.do(generative_model, fixed_parameters) as synthetic_model:
   idata = pm.sample_prior_predictive(random_seed=seed) # Sample from prior predictive distribution.
   synthetic_y = idata.prior["plant growth"].sel(draw=0, chain=0)

Sampling: [plant growth]


In [11]:
with pm.observe(generative_model, {"plant growth": synthetic_y}) as inference_model:
   idata = pm.sample(random_seed=seed)

   summary = pm.stats.summary(idata, var_names=["betas", "sigma"])
   print(summary)

Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [sigma, betas]


Output()

Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 2 seconds.


                         mean     sd  hdi_3%  hdi_97%  mcse_mean  mcse_sd  \
betas[sunlight hours]   4.973  0.054   4.875    5.076      0.001    0.001   
betas[water amount]    19.963  0.050  19.868   20.059      0.001    0.001   
betas[soil nitrogen]    1.996  0.056   1.897    2.103      0.001    0.001   
sigma                   0.512  0.038   0.443    0.582      0.001    0.001   

                       ess_bulk  ess_tail  r_hat  
betas[sunlight hours]    5458.0    3238.0    1.0  
betas[water amount]      4853.0    2804.0    1.0  
betas[soil nitrogen]     5103.0    3037.0    1.0  
sigma                    4906.0    3038.0    1.0  


In [12]:
# Simulate new data conditioned on inferred parameters
new_x_data = pm.draw(
    pm.Normal.dist(shape=(3, 3)),
    random_seed=seed,
)
new_coords = coords | {"trial": [0, 1, 2]}

with inference_model:
    pm.set_data({"x": new_x_data}, coords=new_coords)
    pm.sample_posterior_predictive(
        idata,
        predictions=True,
        extend_inferencedata=True,
        random_seed=seed,
    )

pm.stats.summary(idata.predictions, kind="stats")

Sampling: [plant growth]


Output()

Unnamed: 0,mean,sd,hdi_3%,hdi_97%
plant growth[0],14.23,0.514,13.27,15.203
plant growth[1],24.415,0.513,23.442,25.365
plant growth[2],-6.749,0.517,-7.744,-5.808
