### Prepare design matrix (dummy variables)

In [6]:
import pymc as pm
import formulaic
import pandas as pd
import numpy as np
import pandas as pd
import arviz as az

data = pd.read_csv("../../single_parameter/combined_data/statistics.csv")

# Convert each module to a seperate column (no global intercept)
model_formula = 'Coverage ~ 0 + C(TargetModule)'
design_matrix = formulaic.model_matrix(model_formula, data=data)

design_matrix.rhs.iloc[:, :]

Unnamed: 0,C(TargetModule)[T.codetiming._timer],C(TargetModule)[T.flutils.decorators],C(TargetModule)[T.flutils.namedtupleutils],C(TargetModule)[T.flutils.packages],C(TargetModule)[T.flutils.setuputils.cmd],C(TargetModule)[T.httpie.output.formatters.headers],C(TargetModule)[T.httpie.plugins.base],C(TargetModule)[T.mimesis.builtins.da],C(TargetModule)[T.py_backwards.transformers.base],C(TargetModule)[T.py_backwards.transformers.dict_unpacking],...,C(TargetModule)[T.pymonet.maybe],C(TargetModule)[T.pymonet.validation],C(TargetModule)[T.pypara.accounting.journaling],C(TargetModule)[T.pytutils.lazy.lazy_import],C(TargetModule)[T.pytutils.python],C(TargetModule)[T.sanic.config],C(TargetModule)[T.sanic.helpers],C(TargetModule)[T.sanic.mixins.signals],C(TargetModule)[T.thonny.plugins.pgzero_frontend],C(TargetModule)[T.typesystem.tokenize.positional_validation]
0,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9355,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
9356,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
9357,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
9358,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0


### Run Model

In [5]:
import pymc as pm

with pm.Model() as model:
    # Global Intercept and standard deviation
    a_bar = pm.Normal('a_bar', mu=0, sigma=1.5)
    sigma = pm.Exponential('sigma', 1.5)
    
    # Module specific intercept
    a_m = pm.Normal('a_m', mu=a_bar, sigma=sigma, shape=design_matrix.rhs.shape[1])
    
    # Activate the correct module dummy variable
    logit_p = pm.math.dot(design_matrix.rhs, a_m)
    
    # Link function (logit), unbounded to (0,1) probability
    p = pm.Deterministic('p', pm.math.invlogit(logit_p))
    
    # Beta distribution likelihood 
    theta = pm.Uniform('theta', 10, 200) # Disperion parameter
    Y_obs = pm.Beta('Y_obs', alpha=p*theta, beta=(1-p)*theta, observed=design_matrix.lhs['Coverage'])
    
    # Sample from the model
    trace = pm.sample(1000, chains=4, return_inferencedata=True, progressbar=True)

print("Model building complete.")

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [a_bar, sigma, a_m, theta]


Output()

Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 42 seconds.


Model building complete.


### Save Trace

In [8]:
posterior_dict = {
    'a_m': trace.posterior['a_m'].values,
    'a_bar': trace.posterior['a_bar'].values,
    'sigma': trace.posterior['sigma'].values,
    'theta': trace.posterior['theta'].values,
}

infdata = az.from_dict(posterior=posterior_dict)

az.to_netcdf(posterior_dict, "trace_model_1.nc")

'trace_model_1.nc'