In [51]:
import arviz as az
import pandas as pd
import pymc3 as pm
import numpy as np
import matplotlib.pyplot as plt

# Extract Data

In [59]:
# Load the dataset
data = pd.read_csv('concrete.csv', delimiter='\t')

# Predictors and output from the dataset
X = data[['x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7', 'x8']].values
concrete_strengths = data['y'].values

In [65]:
concrete = pd.read_csv("concrete.csv", delimiter='\t')
concrete.head()

Unnamed: 0,x1,x2,x3,x4,x5,x6,x7,x8,y
0,5.4,0.0,0.0,1.62,0.25,10.4,6.76,28,134.43
1,5.4,0.0,0.0,1.62,0.25,10.55,6.76,28,61.89
2,3.33,1.43,0.0,2.28,0.0,9.32,5.94,270,40.27
3,3.33,1.43,0.0,2.28,0.0,9.32,5.94,365,41.05
4,1.99,1.32,0.0,1.92,0.0,9.78,8.26,360,44.3


# Model

In [60]:
with pm.Model() as model:
    # Data
    X_data = pm.Data('X_data', X)

    # Priors
    intercept = pm.Normal('Intercept', mu=0, sigma=10)
    beta = pm.Normal('Beta', mu=0, sigma=100, shape=8)
    sigma = pm.Exponential("sigma", 0.01)

    # Linear model
    mu = intercept + pm.math.dot(X_data, beta)

    # Likelihood
    likelihood = pm.Normal('likelihood', mu=mu, sigma=sigma, observed=concrete_strengths)

    # Predicted Mu
    x_new = np.array([[2.5, 1, 0.5, 1.8, 0.6, 8, 7, 30]])
    mu_pred = pm.Deterministic("mu_pred", intercept + pm.math.dot(x_new, beta))  # Predictive mean

    # Posterior sampling
    trace = pm.sample(3000, target_accept=0.95)

  return wrapped_(*args_, **kwargs_)
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [sigma, Beta, Intercept]


  return _boost._beta_ppf(q, a, b)
  return _boost._beta_ppf(q, a, b)
  return _boost._beta_ppf(q, a, b)
  return _boost._beta_ppf(q, a, b)
Sampling 4 chains for 1_000 tune and 3_000 draw iterations (4_000 + 12_000 draws total) took 28 seconds.


In [61]:
az.summary(trace)

Got error No model on context stack. trying to find log_likelihood in translation.


Unnamed: 0,mean,sd,hdi_3%,hdi_97%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
Intercept,7.278,8.875,-9.423,23.734,0.128,0.091,4774.0,5847.0,1.0
Beta[0],10.968,0.579,9.876,12.035,0.008,0.006,5226.0,6808.0,1.0
Beta[1],9.583,0.691,8.277,10.875,0.01,0.007,5028.0,6762.0,1.0
Beta[2],7.226,1.025,5.261,9.091,0.014,0.01,5132.0,6523.0,1.0
Beta[3],-16.64,2.382,-21.187,-12.195,0.03,0.021,6493.0,7957.0,1.0
Beta[4],2.879,1.091,0.803,4.92,0.015,0.01,5574.0,7508.0,1.0
Beta[5],0.457,0.468,-0.451,1.288,0.005,0.004,8037.0,8365.0,1.0
Beta[6],0.795,0.553,-0.19,1.894,0.007,0.005,5702.0,7646.0,1.0
Beta[7],0.11,0.007,0.098,0.123,0.0,0.0,9853.0,8334.0,1.0
sigma,12.961,0.282,12.418,13.48,0.003,0.002,9847.0,7579.0,1.0


In [62]:
new_X = np.array([273, 11, 0, 185, 6.4, 968, 780, 25])[np.newaxis, :]

with model:
    pm.set_data({"X_data": new_X})
    ppc = pm.sample_posterior_predictive(trace)

az.summary(ppc, hdi_prob=0.95, kind="stats").mean()



mean         1112.625
sd            845.473
hdi_2.5%     -533.466
hdi_97.5%    2757.074
dtype: float64