In [None]:
import numpy as np
import scipy
import scipy.stats as stats
import matplotlib.pyplot as plt
import seaborn as sns
import pymc3 as pm
import arviz as az
import pandas as pd
import plotly.express as px

In [None]:
sns.set()

## Factory data

In [None]:
df_wide = pd.read_csv("factory.csv")
df_wide.set_index("measurement", inplace=True)
df_wide.columns.name = "machine"
df_wide

A "wide" format is more convenient for the following analyises. Let us *melt* the dataframe!

In [None]:
df = pd.melt(df_wide.reset_index(),
             id_vars=["measurement"],
             value_vars=['M1', 'M2', 'M3', 'M4', 'M5', 'M6'],
             value_name='quality')
df["machine"] = df["machine"].astype("category")
df.head()

## Some traditional analysis

* Obtain the pooled (global) mean and standard deviation of quality measurement

In [None]:
df["quality"].mean(), df["quality"].std()

* Plot the histogram of the quality measurement

In [None]:
sns.histplot(df["quality"]);
#px.histogram(df["quality"]);

* Obtain the mean and standard deviation of quality measurement, for the different machines

In [None]:
df.groupby("machine")[["quality"]].agg(["mean", "std"])

* Draw boxplots of quality for the different factories

In [None]:
sns.boxplot(data=df, x="quality", y="machine");
#px.box(df, x="quality", y="machine")

Even from a classical analysis, there seems to be evidence that different machines have different quality.

## Pooled Bayesian model

\begin{align*}
y_{ij} &\sim \mathcal{N}(\mu, \sigma) \\
\mu &\sim \mathcal{N}(92, 100) \\
\sigma &\sim |\mathcal{N}|(0, 100) \\
\end{align*}


* Implement the pooled model in pymc3

In [None]:
pd.DataFrame(stats.halfnorm.rvs(size=1000, scale=15)).describe()

In [None]:
with pm.Model() as model_pooled:
    mu = pm.Normal('mu', mu=92.5, sd=10)
    std = pm.HalfNormal('std', sd=15)
    y = pm.Normal('y', mu=mu, sd=std, observed=df["quality"])
    trace_pooled = pm.sample(10_000, return_inferencedata=True)

In [None]:
with model_pooled:
    display(az.summary(trace_pooled))

In [None]:
with model_pooled:
    az.plot_posterior(trace_pooled);

In [None]:
#std_pooled_trace = np.array(trace_pooled.posterior["std"]).reshape(-1)
#plt.hist(std_pooled_trace, bins=50);

## Unpooled Bayesian Model

\begin{align*}
y_{ij} &\sim \mathcal{N}(\mu_j, \sigma_j) \\
\mu_{j} &\sim \mathcal{N}(0, 1) \\
\sigma_{j} &\sim |\mathcal{N}|(0, \sigma_j) \\
\end{align*}



* Implement the unpooled model in pymc3

In [None]:
df["machine"] = df["machine"].astype("category")
df["machine_id"] = df["machine"].cat.codes
#df["machine"].cat.categories
#df["machine"].cat.codes
machines = list(df["machine"].cat.categories)
n_machines = len(machines) # number of machines (6)
df.head()

In [None]:
with pm.Model() as model_unpooled:
    group_mu = pm.Normal('group_mu', mu=92.5, sd=100, shape=n_machines)
    group_std = pm.HalfNormal('group_std', sd=100, shape=n_machines)
    y = pm.Normal('y', mu=group_mu[df["machine_id"]], sd=group_std[df["machine_id"]], observed=df["quality"])
    trace_unpooled = pm.sample(10_000, return_inferencedata=True)


In [None]:
with model_unpooled:
    display(az.summary(trace_unpooled))

In [None]:
with model_unpooled:
    axes = az.plot_forest (trace_unpooled,  kind='ridgeplot', hdi_prob=0.95, var_names="group_mu", combined=True);
    axes[0].set_yticklabels(machines[::-1])

In [None]:
with model_unpooled:
    axes = az.plot_forest (trace_unpooled,  kind='ridgeplot', hdi_prob=0.95, var_names="group_std", combined=True);
    axes[0].set_yticklabels(machines[::-1])

* Compare the sample unpooled standard deviation with the bayesian estimate. What do you observe?

We already computed the sample standard deviations before:

In [None]:
df.groupby("machine")[["quality"]].agg(["mean", "std"])

The posterior means of the bayesian estimates are:

In [None]:
with model_unpooled:
    display(az.summary(trace_unpooled, var_names=["group_std"]))

In [None]:
#plt.hist(trace_separate["group_std"][:, 5], bins=50);
#trace_std = np.array(trace_separate.posterior["group_std"]).reshape(-1, machines)
#px.histogram(trace_std[:, 0])#, bins=50);

In [None]:
# az.plot_posterior(trace_separate["group_std"][:, 0], point_estimate='mode')
with model_unpooled:
    az.plot_posterior(trace_unpooled, var_names=["group_std"], point_estimate='mode'); # the posterior mean is 29, the mode is 19!

The sample std is rather different from the posterior std mean. The sample std is actually much similar to the posterior mode (as expected from theory)! The posterior std distributions are heaviy right-skewed!

## Hierarchical model, common std

\begin{align*}
\mu_\mu & \sim N() \\
\mu_{\sigma} \sim
y_{ij} &\sim \mathcal{N}(\mu_j, \sigma_j) \\
\mu_{j} &\sim \mathcal{N}(0, 1) \\
\sigma_{j} &\sim |\mathcal{N}|(0, \sigma_j) \\
\end{align*}



* Implement the hierarchical model with common std in pymc3

In [None]:
with pm.Model() as model_hierarchical:
    
    # hyper_priors
    hyper_mu_mu = pm.Normal('hyper_mu_mu', mu=92, sd=100)
    hyper_mu_std = pm.HalfNormal('hyper_mu_std', sd=100)

    # priors    
    group_mu = pm.Normal('group_mu', mu=hyper_mu_mu, sd=hyper_mu_std, shape=n_machines)
    std = pm.HalfNormal('std', sd=100)
    y = pm.Normal('y', mu=group_mu[df["machine_id"]], sd=std, observed=df["quality"])
    trace_hierarchical = pm.sample(10_000, return_inferencedata=True)

In [None]:
az.summary(trace_hierarchical)

In [None]:
axes = az.plot_forest([trace_unpooled, trace_hierarchical], 
                      model_names=['factory_unpooled', 'factory_hierarchical'], combined=True, kind='ridgeplot', var_names='group_mu', hdi_prob=0.95);

grand_mean = df["quality"].mean()
axes[0].axvline(x=grand_mean, color="r", label="grand_mean");
axes[0].set_yticklabels(machines[::-1]);

In the hierarchical model, the machine means are pushed towards the grand mean

## Model selection 

In [None]:
#with factory_pooled, factory_separate, factory_hierarchical:
comp_df = az.compare({"model_pooled": trace_pooled,
                      "model_unpooled": trace_unpooled,
                      "model_hierarchical": trace_hierarchical},
                     ic="waic", method="BB-pseudo-BMA")
comp_df

In [None]:
comp_df

In [None]:
az.plot_compare(comp_df)

## Manual WAIC computations

Let us compare the arviz computation of WAIC with a manual implementation:

In [None]:
with model_hierarchical:
    display(az.waic(trace_hierarchical))

In [None]:
log_lik = np.array(trace_hierarchical.log_likelihood.y) # chains, iters, obs
log_lik = log_lik.reshape((-1, log_lik.shape[-1])) # mc samples, obs
S, ny = log_lik.shape

In [None]:
p_waic = np.var(log_lik, axis=0) # correct
p_waic = np.sum(p_waic)
p_waic

In [None]:
lppd = scipy.special.logsumexp(log_lik, axis=0) - np.log(S) # sum is equivalent to logsumexp in log domain...
lppd = np.sum(lppd)
lppd

In [None]:
-2*(lppd - p_waic)

In [None]:
lppd - p_waic

## With predictive distribution

TODO

In [None]:
with pm.Model() as model_pooled_with_pred:
    mu = pm.Normal('mu', mu=92.5, sd=10)
    std = pm.HalfNormal('std', sd=15)
    y = pm.Normal('y', mu=mu, sd=std, observed=df["quality"])
    
    # Prediction
    y_pred = pm.Normal('y_pred', mu=mu, sd=std)
    trace_pooled_with_pred = pm.sample(10_000, return_inferencedata=True)

In [None]:
#trace_pooled_with_pred.posterior.y_pred

In [None]:
with pm.Model() as model_hierarchical_with_pred:
    
    # hyper_priors
    hyper_mu_mu = pm.Normal('hyper_mu_mu', mu=92, sd=100)
    hyper_mu_std = pm.HalfNormal('hyper_mu_std', sd=100)

    # priors    
    group_mu = pm.Normal('group_mu', mu=hyper_mu_mu, sd=hyper_mu_std, shape=n_machines)
    std = pm.HalfNormal('std', sd=100)
    y = pm.Normal('y', mu=group_mu[df["machine_id"]], sd=std, observed=df["quality"])
    
    
    pred_mu = pm.Normal('pred_mu', mu=hyper_mu_mu, sd=hyper_mu_std)
    pred_y = pm.Normal('pred_y', mu=pred_mu, sd=std)
    
    
    trace_hierarchical_with_pred = pm.sample(10_000, return_inferencedata=True)
    
    
    

In [None]:
with pm.Model() as model_hierarchical_with_pred:
    
    # hyper_priors
    hyper_mu_mu = pm.Normal('hyper_mu_mu', mu=92, sd=100)
    hyper_mu_std = pm.HalfNormal('hyper_mu_std', sd=100)

    # priors    
    group_mu = pm.Normal('group_mu', mu=hyper_mu_mu, sd=hyper_mu_std, shape=n_machines)
    std = pm.HalfNormal('std', sd=100)
    y = pm.Normal('y', mu=group_mu[df["machine_id"]], sd=std, observed=df["quality"])
    
    
    pred_mu = pm.Normal('pred_mu', mu=hyper_mu_mu, sd=hyper_mu_std)
    pred_y = pm.Normal('pred_y', mu=pred_mu, sd=std)
    
    
    trace_hierarchical_with_pred = pm.sample(10_000, return_inferencedata=True)
    
    
    