Based on https://bambinos.github.io/bambi/notebooks/t_regression.html

In [None]:
import arviz as az
import bambi as bmb
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [None]:
az.style.use("arviz-darkgrid")
np.random.seed(1111)

In [None]:
size = 100
true_intercept = 1
true_slope = 2

x = np.linspace(0, 1, size)
# y = a + b*x
true_regression_line = true_intercept + true_slope * x
# add noise
y = true_regression_line + np.random.normal(scale=0.5, size=size)

# Add outliers
x_out = np.append(x, [0.1, 0.15, 0.2])
y_out = np.append(y, [8, 6, 9])

data = pd.DataFrame({"x": x_out, "y": y_out})

In [None]:
fig = plt.figure(figsize=(7, 7))
ax = fig.add_subplot(
    111, xlabel="x", ylabel="y", title="Generated data and underlying model"
)
ax.plot(x_out, y_out, "x", label="sampled data")
ax.plot(x, true_regression_line, label="true regression line", lw=2.0)
plt.legend(loc=0);

In [None]:
# Note, "gaussian" is the default argument for family. Added to be explicit.
gauss_model = bmb.Model("y ~ x", data, family="gaussian")
gauss_fitted = gauss_model.fit(draws=2000, idata_kwargs={"log_likelihood": True})
gauss_model.predict(gauss_fitted, kind="pps")

In [None]:
az.summary(gauss_fitted)

In [None]:
plt.figure(figsize=(7, 5))
# Plot Data
plt.plot(x_out, y_out, "x", label="data")
# Plot recovered linear regression
x_range = np.linspace(min(x_out), max(x_out), 2000)
y_pred = (
    gauss_fitted.posterior.x.mean().item() * x_range
    + gauss_fitted.posterior.Intercept.mean().item()
)
plt.plot(
    x_range, y_pred, color="black", linestyle="--", label="Recovered regression line"
)
# Plot HDIs
for interval in [0.38, 0.68]:
    az.plot_hdi(
        x_out, gauss_fitted.posterior_predictive.y, hdi_prob=interval, color="firebrick"
    )
# Plot true regression line
plt.plot(x, true_regression_line, label="True regression line", lw=2.0, color="black")
plt.legend(loc=0);

In [None]:
normal_data = np.random.normal(loc=0, scale=1, size=100_000)
t_data = np.random.standard_t(df=1, size=100_000)

bins = np.arange(-8, 8, 0.15)
plt.hist(normal_data, bins=bins, density=True, alpha=0.6, label="Normal")
plt.hist(t_data, bins=bins, density=True, alpha=0.6, label="Student T")
plt.xlabel("x")
plt.ylabel("Probability density")
plt.xlim(-8, 8)
plt.legend();

In [None]:
bins = np.arange(-8, 8, 0.15)
for ndof in [0.1, 1, 10]:
    t_data = np.random.standard_t(df=ndof, size=100_000)

    plt.hist(t_data, bins=bins, density=True, label=f"$\\nu = {ndof}$", histtype="step")
plt.hist(normal_data, bins=bins, density=True, histtype="step", label="Normal")

plt.xlabel("x")
plt.ylabel("Probability density")
plt.xlim(-6, 6)
plt.legend();

In [None]:
t_model = bmb.Model("y ~ x", data, family="t")
t_fitted = t_model.fit(draws=2000, idata_kwargs={"log_likelihood": True})
t_model.predict(t_fitted, kind="pps")

In [None]:
az.summary(t_fitted)

In [None]:
def get_slope_intercept(mod):
    return (mod.posterior.x.mean().item(), mod.posterior.Intercept.mean().item())


gauss_slope, gauss_int = get_slope_intercept(gauss_fitted)
t_slope, t_int = get_slope_intercept(t_fitted)

pd.DataFrame(
    {
        "Model": ["True", "Normal", "T"],
        "Slope": [2, gauss_slope, t_slope],
        "Intercept": [1, gauss_int, t_int],
    }
).set_index("Model").T.round(decimals=2)

In [None]:
plt.figure(figsize=(7, 5))
# Plot Data
plt.plot(x_out, y_out, "x", label="data")
# Plot recovered robust linear regression
x_range = np.linspace(min(x_out), max(x_out), 2000)
y_pred = (
    t_fitted.posterior.x.mean().item() * x_range
    + t_fitted.posterior.Intercept.mean().item()
)
plt.plot(
    x_range, y_pred, color="black", linestyle="--", label="Recovered regression line"
)
# Plot HDIs
for interval in [0.05, 0.38, 0.68]:
    az.plot_hdi(
        x_out, t_fitted.posterior_predictive.y, hdi_prob=interval, color="firebrick"
    )
# Plot true regression line
plt.plot(x, true_regression_line, label="true regression line", lw=2.0, color="black")
plt.legend(loc=0);

In [None]:
models = {"gaussian": gauss_fitted, "Student T": t_fitted}
df_compare = az.compare(models)
df_compare

In [None]:
az.plot_compare(df_compare, insample_dev=False);