In [18]:
import arviz as az
import pandas as pd
import pymc3 as pm
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

# Data Extraction

In [19]:
# Load the dataset
data = pd.read_csv('concrete.csv', delimiter='\t')

# Predictors and output from the dataset
X_full = data[['x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7', 'x8']].values
X_without_x6 = data[['x1', 'x2', 'x3', 'x4', 'x5', 'x7', 'x8']].values
X_without_x7 = data[['x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x8']].values
concrete_strengths = data['y'].values

Y_observed = concrete_strengths.repeat(3).reshape(-1, 3)

# Model

In [20]:
with pm.Model() as model:
    # Data
    X_shared_full = pm.Data('X_full', X_full)
    X_shared_without_x6 = pm.Data('X_without_x6', X_without_x6)
    X_shared_without_x7 = pm.Data('X_without_x7', X_without_x7)
    

    # Priors
    beta_full = pm.Normal("beta_full", mu=0, sigma=10, shape=8)
    beta_without_x6 = pm.Normal("beta_without_x6", mu=0, sigma=10, shape=7)
    beta_without_x7 = pm.Normal("beta_without_x7", mu=0, sigma=10, shape=7)
    intercept = pm.Normal('Intercept', mu=0, sigma=10)
    tau = pm.Gamma("tau", alpha=0.001, beta=0.001, shape=3)

    # Linear models
    mu_full = intercept + pm.math.dot(X_shared_full, beta_full)
    mu_without_x6 = intercept + pm.math.dot(X_shared_without_x6, beta_without_x6)
    mu_without_x7 = intercept + pm.math.dot(X_shared_without_x7, beta_without_x7)

    # Stack mus for the three models
    mu = pm.math.stack([mu_full, mu_without_x6, mu_without_x7], axis=1)

    # Likelihoods for the three models
    likelihood = pm.Normal('likelihood', mu=mu, tau=tau, observed=Y_observed)

    # Posterior sampling
    trace = pm.sample(3000, target_accept=0.95)

    # Posterior predictive checks
    ppc = pm.sample_posterior_predictive(trace)
    inference_data = az.from_pymc3(trace=trace, posterior_predictive=ppc)

  return wrapped_(*args_, **kwargs_)
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [tau, Intercept, beta_without_x7, beta_without_x6, beta_full]


  return _boost._beta_ppf(q, a, b)
  return _boost._beta_ppf(q, a, b)
  return _boost._beta_ppf(q, a, b)
  return _boost._beta_ppf(q, a, b)
Sampling 4 chains for 1_000 tune and 3_000 draw iterations (4_000 + 12_000 draws total) took 61 seconds.


In [24]:
Y_new = az.summary(inference_data.posterior_predictive)["mean"].values.reshape(-1, 3)
D2 = (Y_observed - Y_new) ** 2
L = np.sqrt(np.sum(D2, axis=0) + np.std(Y_new, axis=0) ** 2)
print("L: ", L)

L:  [413.75312452 414.00727911 413.94675522]


# Consclusion

Based on the obtained L values, the full model is preferred as it has the lowest L value, implying that it has a better fit.