In [None]:
import numpy as np
import scipy
import scipy.stats as stats
import matplotlib.pyplot as plt
import seaborn as sns
import pymc3 as pm
import arviz as az
import pandas as pd
import plotly.express as px

## Factor data

In [None]:
mean_1 = 0
mean_2 = 5
std = 1
N = 100
y = np.r_[np.random.randn(N)*std + mean_1, np.random.randn(N)*std + mean_2]
f = np.array(np.r_[np.zeros(N), np.ones(N)]).astype("int")
df = pd.DataFrame({"factor": f, "value": y})
df["factor"] = df["factor"].astype("category")
df

In [None]:
#px.histogram(df, x="value", color="factor")
sns.boxplot(data=df, x="value", y="factor")
df.groupby("factor").agg(["mean", "std"])

## Hierarchical model, common std

\begin{align*}
y_{ij} &\sim \mathcal{N}(\mu_j, \sigma_j) \\
\mu_{j} &\sim \mathcal{N}(0, 1) \\
\sigma_{j} &\sim |\mathcal{N}|(0, \sigma_j) \\
\end{align*}



* Implement the hierarchical model with common std in pymc3

In [None]:
df["factor_id"] = df["factor"].cat.codes
factors = list(df["factor"].cat.categories)
n_factors = len(factors) # number of machines (6)

In [None]:
with pm.Model() as model_hierarchical:
    
    # hyper_priors
    hyper_mu_mu = pm.Normal('hyper_mu_mu', mu=92, sd=100)
    hyper_mu_std = pm.HalfNormal('hyper_mu_std', sd=100)

    # priors    
    group_mu = pm.Normal('group_mu', mu=hyper_mu_mu, sd=hyper_mu_std, shape=n_factors)
    std = pm.HalfNormal('std', sd=100)
    y = pm.Normal('y', mu=group_mu[df["factor_id"]], sd=std, observed=df["value"])
    
    # prediction
    pred_mu = pm.Normal('pred_mu', mu=hyper_mu_mu, sd=hyper_mu_std)
    pred_y = pm.Normal('pred_y', mu=pred_mu, sd=std)
    
    
    trace_hierarchical = pm.sample(1000, return_inferencedata=True)

In [None]:
with model_hierarchical:
    display(az.summary(trace_hierarchical))

In [None]:
plt.hist(np.array(trace_hierarchical.posterior.pred_y).ravel(), bins=100);