# Normal-Normal Example

Let's assume we're conducting quality control in a factory. We have a machine that produces candies, and the weight of these candies follows a normal distribution. We know the standard deviation of the weight is 2 grams, but the machine has a control to adjust the average weight. The setting on the machine has been lost, so we don't know the average weight it is set for. We weigh 10 candies, and we want to infer the average weight from these measurements.

## Base R

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from cmdstanpy import CmdStanModel, cmdstan_path
from scipy.stats import norm


# Prior hyperparameters
mu_prior = 100
sigma_prior = 50


## Visualize the prior distribution
plt.hist(norm.rvs(mu_prior, sigma_prior, size = 1000000))


# 95% prior credible interval for theta
norm.ppf([0.025, 0.975], mu_prior, sigma_prior)

In [None]:
# Simulate some Normal data
np.random.seed(123) # for reproducibility
N = 10
true_mu = 50 # true average weight of candies (in grams)
sigma = 2 # known standard deviation (in grams)
x = norm.rvs(true_mu, sigma, size = N) # observed weights

# Update parameters
mu_posterior = (sigma**2*mu_prior + N*np.mean(x)*sigma_prior**2)/(sigma**2 + N*sigma_prior**2)
sigma_posterior = (sigma**2*sigma_prior**2)/(sigma**2+N*sigma_prior**2)

mu_posterior
sigma_posterior

plt.clf()
plt.hist(norm.rvs(mu_posterior, sigma_posterior, size = 100000))
plt.show()

In [None]:
# 95% posterior credible interval for theta
norm.ppf([0.025, 0.975], mu_posterior, sigma_posterior)

In [None]:
# Predictive distribution of a NEW 50 observations

# Manually
# Pick 100,000 values of theta from prior
mu_draws = norm.rvs(loc = mu_posterior, scale = sigma_posterior, size = 100000)
# Pick 100,000 values from Binomial(n = 50, p = theta)
future_obs = norm.rvs(loc = mu_draws, scale = sigma, size = 100000)

plt.clf()
plt.hist(future_obs)
plt.show()

In [None]:
plt.clf()
plt.hist(norm.rvs(loc = mu_posterior, scale = np.sqrt(sigma**2+sigma_posterior**2), size = 100000))
plt.show()

# Stan

In [None]:
# Import necessary libraries
from cmdstanpy import CmdStanModel
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import norm, mode

# Define Stan model
stan_code = """
data {
  int<lower=0> N; // number of observations
  vector[N] y; // observed weights
  real<lower=0> sigma; // known standard deviation
  real mu_prior; // prior mean
  real<lower=0> sigma_prior; // prior standard deviation
}
parameters {
  real mu; // mean
}
model {
  mu ~ normal(mu_prior, sigma_prior); // prior
  y ~ normal(mu, sigma); // likelihood
}
generated quantities {
  real y_pred;
  y_pred = normal_rng(mu, sigma); // posterior predictive distribution
}
"""

# Write model to a file
with open("model.stan", "w") as f:
    f.write(stan_code)

# Compile model
model = CmdStanModel(stan_file="model.stan")

In [None]:
# Simulate some Normal data
np.random.seed(123)  # for reproducibility
N = 10
true_mu = 50  # true average weight of candies (in grams)
sigma = 2  # known standard deviation (in grams)
y = norm.rvs(loc = true_mu, scale = sigma, size=N)  # observed weights

# Prior hyperparameters
mu_prior = 100
sigma_prior = 50

# Run Stan model
stan_data = {
  "N": N,
  "y": y,
  "sigma": sigma,
  "mu_prior": mu_prior,
  "sigma_prior": sigma_prior
}

fit = model.sample(data=stan_data, iter_sampling=1000, chains=4)

In [None]:
# Extract posterior samples
posterior_samples = fit.stan_variables()

# Convert to DataFrame
posterior_samples_df = pd.DataFrame(posterior_samples)

# 95% credible interval
ci = np.quantile(posterior_samples_df['mu'], [0.025, 0.975])
print(f"95% credible interval for mu: ({ci[0]}, {ci[1]})")

In [None]:
# MAP estimate
# Doesn't work for continuous distributions
#map_estimate = mode(posterior_samples_df['mu'])[0][0]
#print(f"MAP estimate for mu: {map_estimate}")

# Expected value (mean of Normal distribution)
expected_value = np.mean(posterior_samples_df['mu'])
print(f"Expected value for mu: {expected_value}")


# Median 
median = np.median(posterior_samples_df['mu'])
print(f"Expected value for mu: {median}")

In [None]:
# Posterior predictive samples
y_pred_samples = posterior_samples_df['y_pred']

# Plot posterior predictive distribution
plt.hist(y_pred_samples, density=True)
plt.title("Posterior predictive distribution")
plt.xlabel("y_pred")
plt.show()