In [None]:
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
import seaborn as sns
import pymc3 as pm
import arviz as az

In [None]:
sns.set()

Probabilistic model:

\begin{align*}
y &\sim \mathcal{N}(\alpha + \beta x, \epsilon) \\
\alpha &\sim \mathcal{N}(\mu_\alpha, \sigma_\alpha)\\
\beta &\sim \mathcal{N}(\mu_\beta, \sigma_\beta)\\
\epsilon &\sim |\mathcal{N}(0, \sigma_\epsilon)|,
\end{align*}

where $y$ is the observed variable and $x$ is an independent, deterministic variable.

Let us generate data:

In [None]:
np.random.seed(1)
N = 100
alpha_real = 2.5
beta_real = 0.9
eps_real = np.random.normal(0, 0.5, size=N)

x = np.random.normal(10, 1, N)
y_real = alpha_real + beta_real * x
y = y_real + eps_real

Standard least-squares solution:

In [None]:
# solution using either sklearn or from scratch

In [None]:
plt.plot(x, y, 'bo')
plt.xlabel('x')
plt.ylabel('y', rotation=0)
plt.plot(x, y_real, 'k');

Let us implement the probabilistic model with pymc(3)

In [None]:
with pm.Model() as model_g:
    alpha = pm.Normal("alpha", mu=0, sd=10)
    beta = pm.Normal("beta", mu=0, sd=1)
    epsilon = pm.HalfNormal("epsilon", 5)

    mu = pm.Deterministic("mu", alpha + beta * x)
    y_pred = pm.Normal('y_pred', mu=mu, sd=epsilon, observed=y)

    idata_g = pm.sample(2000, tune=2000, return_inferencedata=True)

In [None]:
az.plot_trace(idata_g, var_names=['alpha', 'beta', 'epsilon']);

In [None]:
az.plot_pair(idata_g, var_names=['alpha', 'beta'], scatter_kwargs={'alpha': 0.1});

In [None]:
az.plot_trace(idata_g, var_names=['alpha', 'beta', 'epsilon']);

In [None]:
az.plot_pair(idata_g, var_names=['alpha', 'beta'], scatter_kwargs={'alpha': 0.1});

In [None]:
plt.plot(x, y, 'C0.')

posterior_g = idata_g.posterior.stack(samples={"chain", "draw"})
alpha_m = posterior_g['alpha'].mean().item()
beta_m = posterior_g['beta'].mean().item()

draws = range(0, posterior_g.samples.size, 10)
plt.plot(x, posterior_g['alpha'][draws].values + posterior_g['beta'][draws].values * x[:,None], c='gray', alpha=0.5)

plt.plot(x, alpha_m + beta_m * x, c='k',
         label=f'y = {alpha_m:.2f} + {beta_m:.2f} * x')

plt.xlabel('x')
plt.ylabel('y', rotation=0)
plt.legend();