# An Overview of Conformal Prediction (CP)

## (1) Quantile adjustment

In [68]:
import numpy as np
from sklearn import datasets
from sklearn.linear_model import LogisticRegression
import plotnine as pn

# Example of probability adjustment for quantile
n = 10
alpha = 0.2
p = 1 - alpha
def get_adjusted_level(alpha, n):
    return np.ceil((n+1)*(1-alpha)) / n
p_adj = get_adjusted_level(alpha, n)
print(f'For n={n}, original ub={p:.2f}, adjusted={p_adj:.2f}')

For n=10, original ub=0.80, adjusted=0.90


## (2) Multiclass (CIFAR)

In [None]:
from sklearn.exceptions import ConvergenceWarning
import warnings
warnings.filterwarnings("ignore", category=ConvergenceWarning)

raw_X, raw_y = datasets.load_digits(return_X_y=True)
np.random.seed(1)
raw_X += 5*np.random.rand(*raw_X.shape)  # Make problem a bit harder

size = raw_X.shape[0]
n_calib = 500
n_train = size - (n_calib + 1)
alpha = 0.1
level_adj = get_adjusted_level(alpha=alpha, n = n_calib)

def run_digits_cpsets(seed):
    # Shuffle and split the data
    np.random.seed(seed)
    idx_shuffled = np.argsort(np.random.rand(size))
    idx_train = idx_shuffled[:n_train]
    idx_calib = idx_shuffled[n_train:-1]
    X_train, y_train = raw_X[idx_train], raw_y[idx_train]
    X_calib, y_calib = raw_X[idx_calib], raw_y[idx_calib]
    X_oos, y_oos = raw_X[[idx_train[-1]]], raw_y[idx_train[-1]]
    # Fit model on training data
    f_theta = LogisticRegression(penalty=None, max_iter=1000)
    f_theta.fit(X=X_train, y = y_train)
    # Generate softmax probs on calibration
    phat = f_theta.predict_proba(X_calib)
    # Generate the scores
    scores = 1 - phat[np.arange(n_calib), y_calib]
    # Get the adjusted quantile value
    qhat = np.quantile(scores, level_adj, method='higher')
    # Predict the held out sample and generate confidence sets
    tau_sets = list(np.where(1 - f_theta.predict_proba(X_oos) <= qhat)[1])
    coverage = bool(np.isin(y_oos, tau_sets))
    return coverage, tau_sets

# Run the simulations
nsim = 100
holder_cov = []
holder_sets = []
for seed in range(nsim):
    if (seed + 1) % 5 == 0:
        print(seed + 1)
    coverage, tau_sets = run_digits_cpsets(seed)
    holder_cov.append(coverage)
    holder_sets.append(tau_sets)
