In [None]:
from doubt import Boot
from doubt.datasets import (Airfoil, Blog, Concrete, CPU, 
                            FacebookComments, FishBioconcentration,
                            FishToxicity, ForestFire, NewTaipeiHousing,
                            PowerPlant, Protein, Servo,
                            SpaceShuttle)
from mapie.regression import MapieRegressor
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
from tqdm.auto import tqdm
from collections import defaultdict

## Setting up

In [None]:
def evaluate_nasa(model, X_tr, X_te, y_tr, y_te, uncertainty=0.05):
    n_boots = int(np.sqrt(len(X_tr)))
    
    # Calculate training residuals
    model.fit(X_tr, y_tr)
    tr_preds = model.predict(X_tr)
    te_preds = model.predict(X_te)
    tr_residuals = y_tr - tr_preds
    
    n_train = X_tr.shape[0]
    n_test = X_te.shape[0]

    # Initialise random number generator
    rng = np.random.default_rng(4242)

    # Compute the model variances
    bootstrap_preds = np.empty((n_boots, n_test))
    for boot_idx in range(n_boots):
        train_idxs = rng.choice(range(n_train), size=n_train, replace=True)
        X_btr = X_tr[train_idxs, :]
        y_btr = y_tr[train_idxs]

        model.fit(X_btr, y_btr)
        
        bootstrap_pred = model.predict(X_te)
        bootstrap_preds[boot_idx] = bootstrap_pred

    # Centre the bootstrapped predictions across the bootstrap dimension
    bootstrap_preds = np.mean(bootstrap_preds, axis=0) - bootstrap_preds

    # Add up the bootstrap predictions and the hybrid train/val residuals
    C = np.array([m + o for m in bootstrap_preds for o in tr_residuals])

    # Calculate the intervals
    intervals = np.expand_dims(te_preds, -1) + np.transpose(np.quantile(C, q=[uncertainty/2, 1-uncertainty/2], axis=0))
    
    coverage = np.mean((y_te > intervals[:, 0]) & (y_te < intervals[:, 1]))
    mean_width = np.mean(intervals[:, 1] - intervals[:, 0])
    return coverage, mean_width

In [None]:
def evaluate_doubt(model, X_tr, X_te, y_tr, y_te, uncertainty=0.05):
    n_boots = int(np.sqrt(len(X_tr)))
    
    bmodel = Boot(model, random_seed=4242)
    bmodel.fit(X_tr, y_tr, n_boots=n_boots)
    preds, intervals = bmodel.predict(X_te, uncertainty=uncertainty, n_boots=n_boots)
    
    coverage = np.mean((y_te > intervals[:, 0]) & (y_te < intervals[:, 1]))
    mean_width = np.mean(intervals[:, 1] - intervals[:, 0])
    return coverage, mean_width

In [None]:
def evaluate_mapie(model, X_tr, X_te, y_tr, y_te, uncertainty=0.05):
    bmodel = MapieRegressor(model)
    bmodel.fit(X_tr, y_tr)
    preds, intervals = bmodel.predict(X_te, alpha=uncertainty)
    
    coverage = np.mean((y_te > intervals[:, 0, 0]) & (y_te < intervals[:, 1, 0]))
    mean_width = np.mean(intervals[:, 1] - intervals[:, 0])
    return coverage, mean_width

In [None]:
datasets = []
scaler = StandardScaler()

# Add Doubt datasets
dataset_classes = [
    Airfoil,
    Concrete,
    FishToxicity,
    ForestFire,
    NewTaipeiHousing,
    PowerPlant,
    Protein,
    Servo,
]

for dataset_class in dataset_classes:
    dataset = dataset_class()
    dataset._data = dataset._data.sample(n=min(len(dataset), 10000), random_state=4242)
    X_tr, X_te, y_tr, y_te = dataset.split(test_size=0.1, random_seed=4242)
    X_tr = scaler.fit_transform(X_tr)
    X_te = scaler.transform(X_te)
    datasets.append((dataset_class.__name__, X_tr, X_te, y_tr, y_te))
    
len(datasets)

In [None]:
for name, X_tr, X_te, y_tr, y_te in datasets:
    print(f'{name}: {len(X_tr) + len(X_te):,} samples, {X_tr.shape[-1]:,} features')

## Linear Regression

In [None]:
model = LinearRegression()

In [None]:
data_dict = defaultdict(list)
for dataset in tqdm(datasets):
    for uncertainty in tqdm(np.arange(0.01, 0.51, 0.01), leave=False):
        nasa_coverage, nasa_mean_width = evaluate_nasa(model, *dataset[1:], uncertainty=uncertainty)
        doubt_coverage, doubt_mean_width = evaluate_doubt(model, *dataset[1:], uncertainty=uncertainty)
        mapie_coverage, mapie_mean_width = evaluate_mapie(model, *dataset[1:], uncertainty=uncertainty)
        data_dict['dataset'].append(dataset[0])
        data_dict['uncertainty'].append(uncertainty)
        data_dict['nasa_coverage_error'].append(100 * (1 - uncertainty - nasa_coverage))
        data_dict['doubt_coverage_error'].append(100 * (1 - uncertainty - doubt_coverage))
        data_dict['mapie_coverage_error'].append(100 * (1 - uncertainty - mapie_coverage))
        data_dict['nasa_mean_width'].append(nasa_mean_width)
        data_dict['doubt_mean_width'].append(doubt_mean_width)
        data_dict['mapie_mean_width'].append(mapie_mean_width)
    
linreg_df = pd.DataFrame(data_dict).set_index(['dataset', 'uncertainty'])
linreg_df

In [None]:
(linreg_df.abs()
          .describe()
          .loc[['mean', 'std']]
          .T
          .sort_values(by='mean'))

In [None]:
for method in ['mapie', 'doubt', 'nasa']:
    print(method.title())
    for x in linreg_df[f'{method}_coverage_error'].abs():
        print(f'{x:.4f}')
    print()

## Decision Tree

In [None]:
model = DecisionTreeRegressor()

In [None]:
data_dict = defaultdict(list)
for dataset in tqdm(datasets):
    for uncertainty in tqdm([0.01, 0.05, 0.1], leave=False):
        nasa_coverage, nasa_mean_width = evaluate_nasa(model, *dataset[1:], uncertainty=uncertainty)
        doubt_coverage, doubt_mean_width = evaluate_doubt(model, *dataset[1:], uncertainty=uncertainty)
        mapie_coverage, mapie_mean_width = evaluate_mapie(model, *dataset[1:], uncertainty=uncertainty)
        data_dict['dataset'].append(dataset[0])
        data_dict['uncertainty'].append(uncertainty)
        data_dict['nasa_coverage_error'].append(100 * (1 - uncertainty - nasa_coverage))
        data_dict['doubt_coverage_error'].append(100 * (1 - uncertainty - doubt_coverage))
        data_dict['mapie_coverage_error'].append(100 * (1 - uncertainty - mapie_coverage))
        data_dict['nasa_mean_width'].append(nasa_mean_width)
        data_dict['doubt_mean_width'].append(doubt_mean_width)
        data_dict['mapie_mean_width'].append(mapie_mean_width)
    
tree_df = pd.DataFrame(data_dict).set_index(['dataset', 'uncertainty'])
tree_df

In [None]:
(tree_df.abs()
        .describe()
        .loc[['mean', 'std']]
        .T
        .sort_values(by='mean'))

In [None]:
for method in ['mapie', 'doubt', 'nasa']:
    print(method.title())
    for x in tree_df[f'{method}_coverage_error'].abs():
        print(f'{x:.4f}')
    print()

## Random Forest

In [None]:
model = RandomForestRegressor()

In [None]:
data_dict = defaultdict(list)
for dataset in tqdm(datasets):
    for uncertainty in tqdm([0.01, 0.05, 0.1], leave=False):
        nasa_coverage, nasa_mean_width = evaluate_nasa(model, *dataset[1:], uncertainty=uncertainty)
        doubt_coverage, doubt_mean_width = evaluate_doubt(model, *dataset[1:], uncertainty=uncertainty)
        mapie_coverage, mapie_mean_width = evaluate_mapie(model, *dataset[1:], uncertainty=uncertainty)
        data_dict['dataset'].append(dataset[0])
        data_dict['uncertainty'].append(uncertainty)
        data_dict['nasa_coverage_error'].append(100 * (1 - uncertainty - nasa_coverage))
        data_dict['doubt_coverage_error'].append(100 * (1 - uncertainty - doubt_coverage))
        data_dict['mapie_coverage_error'].append(100 * (1 - uncertainty - mapie_coverage))
        data_dict['nasa_mean_width'].append(nasa_mean_width)
        data_dict['doubt_mean_width'].append(doubt_mean_width)
        data_dict['mapie_mean_width'].append(mapie_mean_width)
    
forest_df = pd.DataFrame(data_dict).set_index(['dataset', 'uncertainty'])
forest_df

In [None]:
(forest_df.abs()
        .describe()
        .loc[['mean', 'std']]
        .T
        .sort_values(by='mean'))

In [None]:
for method in ['mapie', 'doubt', 'nasa']:
    print(method.title())
    for x in forest_df[f'{method}_coverage_error'].abs():
        print(f'{x:.4f}')
    print()