In [1]:
import arviz as az
import pandas as pd
import pymc3 as pm
import numpy as np

# Data Extraction

In [3]:
# Load the dataset
data = pd.read_csv('concrete.csv', delimiter='\t')

# Predictors and output from the dataset
X = data[['x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7', 'x8']].values
concrete_strengths = data['y'].values

# Model

In [21]:
with pm.Model() as model:
    # Data
    X_shared = pm.Data('X_data', X)

    # Priors
    intercept = pm.Normal('Intercept', mu=0, sigma=10)
    beta = pm.Normal('Beta', mu=0, sigma=10, shape=8)
    tau = pm.Gamma("tau", alpha=0.001, beta=0.001)

    # Linear model
    mu = intercept + pm.math.dot(X, beta)

    # Likelihood
    likelihood = pm.Normal('likelihood', mu=mu, tau=tau, observed=concrete_strengths)

    # Predicted Mu
    x_new = np.array([[2.5, 1, 0.5, 1.8, 0.6, 8, 7, 30]])
    mu_pred = pm.Deterministic("mu_pred", intercept + pm.math.dot(x_new, beta))  # Predictive mean

    # Posterior sampling
    trace = pm.sample(3000, target_accept=0.95)

  return wrapped_(*args_, **kwargs_)
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [tau, Beta, Intercept]


  return _boost._beta_ppf(q, a, b)
  return _boost._beta_ppf(q, a, b)
  return _boost._beta_ppf(q, a, b)
  return _boost._beta_ppf(q, a, b)
Sampling 4 chains for 1_000 tune and 3_000 draw iterations (4_000 + 12_000 draws total) took 34 seconds.


# Show Statistics

In [22]:
az.summary(trace, hdi_prob=0.9)

Got error No model on context stack. trying to find log_likelihood in translation.


Unnamed: 0,mean,sd,hdi_5%,hdi_95%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
Intercept,5.989,8.807,-8.514,20.382,0.125,0.09,4966.0,5859.0,1.0
Beta[0],10.892,0.569,9.945,11.812,0.008,0.006,5369.0,6914.0,1.0
Beta[1],9.475,0.697,8.323,10.622,0.009,0.007,5491.0,7602.0,1.0
Beta[2],7.072,1.017,5.401,8.724,0.013,0.01,5728.0,7744.0,1.0
Beta[3],-15.785,2.293,-19.424,-11.927,0.027,0.019,7060.0,7782.0,1.0
Beta[4],3.134,1.071,1.404,4.937,0.014,0.01,6061.0,8126.0,1.0
Beta[5],0.471,0.465,-0.31,1.213,0.005,0.004,7283.0,7480.0,1.0
Beta[6],0.777,0.553,-0.136,1.671,0.007,0.005,6143.0,6435.0,1.0
Beta[7],0.11,0.007,0.098,0.12,0.0,0.0,9720.0,8371.0,1.0
tau,0.006,0.0,0.006,0.006,0.0,0.0,9787.0,8070.0,1.0


# Calculate R²

In [23]:
with model:
    post_pred = pm.sample_posterior_predictive(trace)
    # Calculate the Bayesian R^2
    R2 = az.r2_score(concrete_strengths, post_pred['likelihood'])
    print(f'Bayesian R^2:\n{R2}')

Bayesian R^2:
r2        0.498917
r2_std    0.013329
dtype: float64


# Prediction

In [24]:
with model:
    pm.set_data({"X_data": x_new})
    ppc = pm.sample_posterior_predictive(trace)

# Show Statistics

In [25]:
az.summary(ppc, hdi_prob=0.9)



Unnamed: 0,mean,sd,hdi_5%,hdi_95%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
likelihood,36.316,18.277,6.486,66.53,0.031,0.022,359622.0,1042722.0,1.04


# Conclusion

### R² Value = 0.498917

### 90% credible set for the mean response:
mean of 32.196 with 90% credible set of [30.584, 33.844]


### Bayesian 90% prediction interval:
mean of 36.316 with 90% prediction interval of [6.486, 66.53]