# PSIS-Conformal with BART on Sine Data

This notebook fits a BART model once, computes PSIS-LOO residuals without refitting, and builds conformal prediction intervals with near-nominal coverage.

In [None]:
# Ensure repo root is importable when running from examples/
import sys
from pathlib import Path
repo_root = Path.cwd()
# If the CWD is examples/, parent is repo root; if already root, keep as is.
if (repo_root / 'examples').exists() and (repo_root / 'bartpy').exists():
    pass
elif (repo_root.name == 'examples') and (repo_root.parent / 'bartpy').exists():
    repo_root = repo_root.parent
sys.path.insert(0, str(repo_root))
print('Repo root:', repo_root)

In [None]:
%matplotlib inline
import numpy as np
from matplotlib import pyplot as plt

from bartpy.sklearnmodel import SklearnModel
from bartpy import (
    prepare_bart_loglik_and_draws,
    loo_residuals_via_psis,
    conformal_quantile,
    build_intervals,
)

In [None]:
# Generate synthetic sine data
rng = np.random.default_rng(42)
n = 250
X = rng.uniform(-3.0, 3.0, size=(n, 1))
f_true = np.sin(X[:, 0])
y = f_true + rng.normal(0.0, 0.25, size=n)
X[:3], y[:3]

In [None]:
# Fit BART with in-sample prediction samples stored
model = SklearnModel(
    n_trees=100,
    n_chains=2,
    n_samples=200,
    n_burn=200,
    thin=0.5,
    store_in_sample_predictions=True,
    store_acceptance_trace=True,
    n_jobs=1,
)
model.fit(X, y)
print('Stored prediction samples shape (S,n):', np.asarray(model._prediction_samples).shape)
print('Stored model samples:', len(model._model_samples))

In [None]:
# Prepare per-observation draws and log-likelihoods
y_obs, y_draws, log_lik = prepare_bart_loglik_and_draws(model)
print('y shape:', y_obs.shape, 'draws shape:', y_draws.shape, 'log_lik shape:', log_lik.shape)

In [None]:
# PSIS-LOO residuals and conformal interval width
residuals, k_diag, loo_mean = loo_residuals_via_psis(y_obs, y_draws, log_lik)
q = conformal_quantile(residuals, alpha=0.1)  # 90% coverage
lo, hi = build_intervals(loo_mean, q)
coverage = np.mean((y_obs >= lo) & (y_obs <= hi))
print(f'Empirical coverage (train, 90% nominal): {coverage:.3f}')
print('Pareto k summary: min={:.3f}, median={:.3f}, max={:.3f}'.format(
    np.nanmin(k_diag), np.nanmedian(k_diag), np.nanmax(k_diag)))

In [None]:
# Plots: Pareto k histogram and intervals along X
fig, axes = plt.subplots(1, 2, figsize=(11, 4))
axes[0].hist(k_diag[np.isfinite(k_diag)], bins=20, color='#4472c4')
axes[0].set_title('Pareto k (PSIS)')
axes[0].axvline(0.5, color='r', linestyle='--', label='0.5')
axes[0].axvline(0.7, color='orange', linestyle='--', label='0.7')
axes[0].legend()

order = np.argsort(X[:, 0])
axes[1].plot(X[order, 0], y_obs[order], 'k.', alpha=0.5, label='y')
axes[1].plot(X[order, 0], loo_mean[order], color='#2ca02c', label='LOO mean')
axes[1].fill_between(X[order, 0], lo[order], hi[order], color='#9bd39b', alpha=0.4, label='Conformal 90%')
axes[1].set_title('PSIS-Conformal intervals (train)')
axes[1].legend()
plt.tight_layout()
plt.show()