# Linear Regression with PyMC - 2025-05-06
This notebook demonstrates how to perform simple linear regression using a fully Bayesian approach with **PyMC**.

In [None]:
# Setup
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style("whitegrid")

import torch
import pyro
import pyro.distributions as dist
from pyro.infer import MCMC, NUTS

import arviz as az
np.random.seed(1234)

## Simulate data

In [None]:
x = torch.arange(1, 21)
a = 0.2
b = 0.3
sigma = 0.5
epsilon = torch.normal(0.0, sigma, size=(20,))
y = a + b * x + epsilon
data = pd.DataFrame({'x': x, 'y': y})
data.head()

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(8,5))

ax.errorbar(x, y, yerr=sigma, fmt="o", markersize=5)
ax.set_title('Simulated Data')
ax.set_xlabel('x')
ax.set_ylabel('y')


## Bayesian Linear Regression using Pyro

In [None]:
x

In [None]:
def model(x, y=None):
    alpha = pyro.sample("alpha", dist.Normal(loc=0, scale=10))
    beta = pyro.sample("beta", dist.Normal(loc=0, scale=10))
    sigma = pyro.sample("sigma", dist.HalfNormal(scale=1))

    mu = alpha + beta * x
    with pyro.plate("data", len(x)):
        pyro.sample("obs", dist.Normal(loc=mu, scale=sigma), obs=y)

In [None]:
pyro.render_model(
    model,
    model_args=(x,y),
    render_distributions=True,
    render_params=True
)

In [None]:
nuts_kernel = NUTS(model)
mcmc = MCMC(nuts_kernel, num_samples=1000, warmup_steps=1000, num_chains=1)
# Note: num_chains > 1 does not work in Jupyter notebooks! It's a Jupyter issue related to multi-processing! 
# For more chains, copy code into a python file and run as a python script.
mcmc.run(x, y=y)
posterior = mcmc.get_samples()

idata = az.from_dict(posterior={k: v.numpy() for k, v in posterior.items()})



We can make trace plots for our chain:

In [None]:
az.plot_trace(mcmc)

In [None]:
az.summary(idata)

# posterior

In [None]:
x_vals = torch.linspace(data['x'].min(), data['x'].max(), 100)

# Get stacked posterior samples (1D arrays)
alpha_samples = posterior['alpha']
beta_samples = posterior['beta']

# Broadcast across x_vals (outer product-style)
y_preds = torch.add(alpha_samples.reshape(len(alpha_samples), 1), torch.outer(beta_samples, x_vals))

# Compute summary statistics
y_mean = y_preds.mean(axis=0)
y_lower = torch.quantile(y_preds, 0.025, axis=0)
y_upper = torch.quantile(y_preds, 0.975, axis=0)

# Plot
plt.figure(figsize=(10, 6))
sns.scatterplot(data=data, x='x', y='y', label='Observed Data')
plt.plot(x_vals, y_mean, color='blue', label='Posterior Mean')
plt.fill_between(x_vals, y_lower, y_upper, color='blue', alpha=0.2, label='95% Credible Interval')
plt.title('Bayesian Linear Regression with PyMC')
plt.xlabel('x')
plt.ylabel('y')
plt.grid(True)
plt.legend()
plt.show()


In [None]:
az.summary(trace, round_to=2)

To get posterior predictive samples, we need to uncondition the model on the data:

In [None]:
unconditioned_model = pyro.poutine.uncondition(model)

In [None]:
posterior_predictive = pyro.infer.Predictive(unconditioned_model, posterior, num_samples=1000)(x, y)

In [None]:
posterior_predictive

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(8,4))

ax.plot(