In [None]:
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
import seaborn as sns
import pymc3 as pm
import arviz as az
import pandas as pd
import plotly.express as px

In [None]:
sns.set()

In [None]:
drowings = np.loadtxt("../data/drowning.txt")
drowings;

In [None]:
data = np.loadtxt("../data/factory.txt")
data

## Pooled model

\begin{align*}
y_{ij} &\sim \mathcal{N}(\mu, \sigma) \\
\mu &\sim \mathcal{N}(92, 100) \\
\sigma &\sim |\mathcal{N}|(0, 100) \\
\end{align*}


In [None]:
data_vec = data.ravel()
data_vec

In [None]:
pd.Series(data_vec).describe()

In [None]:
plt.hist(data_vec);

In [None]:
az.plot_kde(data_vec)

In [None]:
(120 - 92.5)/3

In [None]:
import scipy
pd.DataFrame(scipy.stats.halfnorm.rvs(size=1000, scale=15)).describe()

In [None]:
with pm.Model() as factory_pooled:
    mu = pm.Normal('mu', mu=92.5, sd=10)
    std = pm.HalfNormal('std', sd=15)
    y = pm.Normal('y', mu=mu, sd=std, observed=data_vec)
    trace_pooled = pm.sample(10_000)

In [None]:
az.summary(trace_pooled)

In [None]:
az.plot_posterior(trace_pooled)

In [None]:
plt.hist(trace_pooled["std"], bins=50);

In [None]:
#with factory_pooled:
az.waic(trace_pooled)

## Separate model

\begin{align*}
y_{ij} &\sim \mathcal{N}(\mu_j, \sigma_j) \\
\mu_{j} &\sim \mathcal{N}(0, 1) \\
\sigma_{j} &\sim |\mathcal{N}|(0, \sigma_j) \\
\end{align*}



In [None]:
#data.ravel(order="F")
#np.tile(np.arange(groups), meas)

groups = data.shape[1] # number of groups
meas = data.shape[0] # number of measurements per group
data_vec = data.ravel()
group_vec = np.tile(np.arange(groups), meas)

In [None]:
data_vec

In [None]:
group_vec

In [None]:
data.mean(axis=0)

In [None]:
data.std(axis=0, ddof=1)

In [None]:
with pm.Model() as factory_separate:
    group_mu = pm.Normal('group_mu', mu=92.5, sd=100, shape=groups)
    group_std = pm.HalfNormal('group_std', sd=100, shape=groups)
    y = pm.Normal('y', mu=group_mu[group_vec], sd=group_std[group_vec], observed=data_vec)
    trace_separate = pm.sample(10_000)


In [None]:
az.summary(trace_separate)

In [None]:
#plt.hist(trace_separate["group_std"][:, 5], bins=50);
px.histogram(trace_separate["group_std"][:, 5])#, bins=50);

In [None]:
az.plot_posterior(trace_separate["group_std"][:, 5], point_estimate='mode')

In [None]:
az.plot_posterior(trace_separate["group_std"][:, 5], point_estimate='mean')

In [None]:
az.summary(trace_separate)

## Hierarchical model, common std

\begin{align*}
y_{ij} &\sim \mathcal{N}(\mu_j, \sigma_j) \\
\mu_{j} &\sim \mathcal{N}(0, 1) \\
\sigma_{j} &\sim |\mathcal{N}|(0, \sigma_j) \\
\end{align*}



In [None]:
data.mean(axis=0)

In [None]:
data.std()

In [None]:
with pm.Model() as factory_hierarchical:
    
    # hyper_priors
    hyper_mu_mu = pm.Normal('hyper_mu_mu', mu=92, sd=100)
    hyper_mu_std = pm.HalfNormal('hyper_mu_std', sd=100)

    # priors    
    group_mu = pm.Normal('group_mu', mu=hyper_mu_mu, sd=hyper_mu_std, shape=groups)
    std = pm.HalfNormal('std', sd=100)
    y = pm.Normal('y', mu=group_mu[group_vec], sd=std, observed=data_vec)
    trace_hierarchical = pm.sample(10_000)

In [None]:
az.summary(trace_hierarchical)

In [None]:
import pandas as pd
df = pd.DataFrame(data)
df.index.name = "measuremet"
df.columns = ["F1", "F2", "F3", "F4", "F5", "F6"]
df.columns.name = "factory"
df

In [None]:
df.agg(["mean", "std"])

In [None]:
data.std(axis=0)

In [None]:
data.mean(), data.std()

In [27]:
az.summary(trace_hierarchical)



Unnamed: 0,mean,sd,hdi_3%,hdi_97%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
hyper_mu_mu,92.949,7.726,78.869,107.661,0.072,0.052,12672.0,10383.0,1.0
group_mu[0],80.016,6.879,67.292,93.054,0.11,0.081,3969.0,2723.0,1.0
group_mu[1],103.099,6.469,90.586,114.951,0.082,0.058,6238.0,4847.0,1.0
group_mu[2],89.059,6.237,77.523,100.988,0.064,0.046,9351.0,8853.0,1.0
group_mu[3],107.203,6.92,94.5,120.46,0.109,0.077,3987.0,2251.0,1.0
group_mu[4],90.687,6.046,78.787,101.624,0.056,0.04,11495.0,9984.0,1.0
group_mu[5],87.682,6.238,76.241,99.787,0.074,0.053,6971.0,6006.0,1.0
hyper_mu_std,15.803,8.999,3.549,30.984,0.147,0.104,1837.0,797.0,1.0
std,15.226,2.323,11.208,19.677,0.034,0.026,5437.0,3495.0,1.0


In [28]:
import pandas as pd
df = pd.DataFrame(data)
df.index.name = "measuremet"
df.columns = ["F1", "F2", "F3", "F4", "F5", "F6"]
df.columns.name = "factory"
df

factory,F1,F2,F3,F4,F5,F6
measuremet,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,83.0,117.0,101.0,105.0,79.0,57.0
1,92.0,109.0,93.0,119.0,97.0,92.0
2,92.0,114.0,92.0,116.0,103.0,104.0
3,46.0,104.0,86.0,102.0,79.0,77.0
4,67.0,87.0,67.0,116.0,92.0,100.0


In [29]:
df.agg(["mean", "std"])

factory,F1,F2,F3,F4,F5,F6
mean,76.0,106.2,87.8,111.6,90.0,86.0
std,19.634154,11.819475,12.79453,7.569676,10.77033,19.222383


In [30]:
data.std(axis=0)

array([17.56132113, 10.57166023, 11.4437756 ,  6.77052435,  9.63327566,
       17.19302184])

In [31]:
data.mean(), data.std()

(92.93333333333334, 17.699215299617727)

In [32]:
data.std(axis=0)

array([17.56132113, 10.57166023, 11.4437756 ,  6.77052435,  9.63327566,
       17.19302184])

In [33]:
data.mean(), data.std()

(92.93333333333334, 17.699215299617727)