# Probabilistic Backpropagation Demo on Housing Benchmark
This is a demo of the Probabilistic Backpropogation algorithm implemented in the `ProbabilisticBackpropagation` class in Bensemble. 

In [None]:
!pip install scikit-learn pandas

In [13]:
import math
import pathlib
import sys

import numpy as np
import pandas as pd
import torch
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

sys.path.append(str(pathlib.Path("../..").resolve()))
from bensemble.methods.probabilistic_backpropagation import ProbabilisticBackpropagation

torch.set_default_dtype(torch.float64)
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
SEED = 7
np.random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(SEED)

print(f"Running on {DEVICE} with seed {SEED}")

Running on cuda with seed 7


## Testing

We will test the `ProbabilisticBackpropagation` implementation using the Boston housing-style dataset from our [benchmark demo](https://github.com/intsystems/bensemble/blob/master/notebooks/benchmark.ipynb) `benchmark.ipynb`.

In [14]:
DATA_CANDIDATES = [
    pathlib.Path("data/housing.data"),
    pathlib.Path("../data/housing.data"),
    pathlib.Path("../../benchmark/data/housing.data"),
]
DATA_PATH = next((p for p in DATA_CANDIDATES if p.exists()), None)
if DATA_PATH is None:
    raise FileNotFoundError("housing.data not found in known locations")
COLUMN_NAMES = [
    "CRIM",
    "ZN",
    "INDUS",
    "CHAS",
    "NOX",
    "RM",
    "AGE",
    "DIS",
    "RAD",
    "TAX",
    "PTRATIO",
    "B",
    "LSTAT",
    "MEDV",
]

df = pd.read_csv(DATA_PATH, sep="\s+", header=None, names=COLUMN_NAMES)
print(df.shape)

TARGET_COL = "MEDV"
TEST_SIZE = 0.2
X = df.drop(columns=[TARGET_COL]).values.astype(np.float32)
y = df[TARGET_COL].values.astype(np.float32).reshape(-1, 1)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=TEST_SIZE, random_state=SEED
)

x_scaler = StandardScaler()
y_scaler = StandardScaler()
X_train_scaled = x_scaler.fit_transform(X_train).astype(np.float32)
X_test_scaled = x_scaler.transform(X_test).astype(np.float32)
y_train_scaled = y_scaler.fit_transform(y_train).astype(np.float32)
y_test_scaled = y_scaler.transform(y_test).astype(np.float32)

train_tensor_x = torch.from_numpy(X_train_scaled)
train_tensor_y = torch.from_numpy(y_train_scaled)
test_tensor_x = torch.from_numpy(X_test_scaled)
test_tensor_y = torch.from_numpy(y_test_scaled)

BATCH_SIZE = 64
train_loader = DataLoader(
    TensorDataset(train_tensor_x, train_tensor_y), batch_size=BATCH_SIZE, shuffle=True
)
test_loader = DataLoader(
    TensorDataset(test_tensor_x, test_tensor_y),
    batch_size=len(test_tensor_x),
    shuffle=False,
)

Y_SCALE = float(y_scaler.scale_[0])
Y_MEAN = float(y_scaler.mean_[0])
y_test_true = y_test.reshape(-1)
print(f"Train size: {len(train_tensor_x)}, Test size: {len(test_tensor_x)}")

(506, 14)
Train size: 404, Test size: 102


  df = pd.read_csv(DATA_PATH, sep='\s+', header=None, names=COLUMN_NAMES)


In [15]:
pbp = ProbabilisticBackpropagation(
    layer_sizes=[train_tensor_x.shape[1], 64, 1], device=DEVICE
)
history = pbp.fit(train_loader, num_epochs=80, step_clip=2.0, prior_refresh=1)
history["train_rmse"][-3:]

[0.3682809469879644, 0.36836619896285056, 0.36872475628900075]

In [16]:
with torch.no_grad():
    # Predict on the full test set (scaled)
    test_batch = next(iter(test_loader))[0].to(DEVICE, dtype=torch.float64)
    mean_scaled, samples_scaled = pbp.predict(test_batch, n_samples=200)
    _, var_scaled = pbp._predictive_mean_var(test_batch)

# Convert predictions back to original scale
mean_np = y_scaler.inverse_transform(mean_scaled.cpu().numpy()).reshape(-1)
var_np = var_scaled.cpu().numpy().reshape(-1) * (Y_SCALE**2)
samples_np = y_scaler.inverse_transform(
    samples_scaled.cpu().numpy().reshape(samples_scaled.shape[0], -1)
)
rmse = float(np.sqrt(np.mean((mean_np - y_test_true) ** 2)))
nlpd = float(
    0.5
    * np.mean(np.log(2 * math.pi * var_np) + ((y_test_true - mean_np) ** 2) / var_np)
)
assert rmse < 6.0, f"RMSE too high: {rmse}"
assert float(np.var(samples_np)) > 0.0
print(f"Test RMSE (original scale): {rmse:.4f}")
print(f"Test NLPD (original scale): {nlpd:.4f}")
print(f"Estimated noise variance (scaled): {pbp.noise_variance().item():.4f}")

Test RMSE (original scale): 5.5321
Test NLPD (original scale): 3.4671
Estimated noise variance (scaled): 0.1332


## Sampling concrete models
We can sample deterministic networks from the posterior approximation to inspect epistemic uncertainty.

In [17]:
with torch.no_grad():
    test_x = test_tensor_x.to(DEVICE, dtype=torch.float64)
    sampled_models = pbp.sample_models(n_models=20)
    sampled_preds = []
    for sm in sampled_models:
        sm = sm.to(DEVICE)
        outputs = sm(test_x).cpu().numpy()  # scaled space
        sampled_preds.append(outputs)

# Back to original scale for metrics
sampled_preds = np.stack(sampled_preds, axis=0)  # (S, N, 1)
sampled_preds_orig = np.stack(
    [y_scaler.inverse_transform(p.reshape(-1, 1)).reshape(-1) for p in sampled_preds],
    axis=0,
)
mean_pred = sampled_preds_orig.mean(axis=0)
epistemic_var = sampled_preds_orig.var(axis=0)
rmse_samples = float(np.sqrt(np.mean((mean_pred - y_test_true) ** 2)))
print(f"Sampled-model RMSE: {rmse_samples:.4f}")
print(f"Epistemic variance (mean over test): {float(epistemic_var.mean()):.4f}")
assert float(epistemic_var.mean()) > 0.0

Sampled-model RMSE: 5.5416
Epistemic variance (mean over test): 0.0742
