# Installation TabPFN

In [None]:
## Base library Installation
# Install Baselines for model comparison
!uv pip install catboost xgboost

# Install the datasets library for loading example data
!uv pip install datasets

# Install rich for better and more readable printing
!uv pip install rich


## TabPFN Installation optimized for Google Colab
# Install the TabPFN Client library
!uv pip install tabpfn-client

# Install tabpfn from source
# Clone the repository: shallow for speedup
!git clone --depth 1 https://github.com/PriorLabs/tabpfn

# Speeding up installation in this notebook:
# Remove torch dependency as it is already installed on colab (do not run this in your local setup)
!sed -i "/torch/d" tabpfn/pyproject.toml

# Step 3: Install using the correct directory name 'tabpfn'
!uv pip install -e "tabpfn"

# Install TabPFN extensions for additional functionalities
!git clone https://github.com/PriorLabs/tabpfn-extensions

# Speeding up installation in this notebook:
# Remove torch dependency as it is already installed on colab (do not run this in your local setup)
!sed -i "/torch/d" tabpfn-extensions/pyproject.toml

!uv pip install -e tabpfn-extensions[all]

[2mUsing Python 3.12.11 environment at: /usr[0m
[2K[2mResolved [1m21 packages[0m [2min 481ms[0m[0m
[2K[2mPrepared [1m1 package[0m [2min 4.52s[0m[0m
[2K[2mInstalled [1m1 package[0m [2min 10ms[0m[0m
 [32m+[39m [1mcatboost[0m[2m==1.2.8[0m
[2mUsing Python 3.12.11 environment at: /usr[0m
[2mAudited [1m1 package[0m [2min 381ms[0m[0m
[2mUsing Python 3.12.11 environment at: /usr[0m
[2mAudited [1m1 package[0m [2min 273ms[0m[0m
[2mUsing Python 3.12.11 environment at: /usr[0m
[2K[2mResolved [1m26 packages[0m [2min 277ms[0m[0m
[2K[2mPrepared [1m4 packages[0m [2min 113ms[0m[0m
[2mUninstalled [1m1 package[0m [2min 2ms[0m[0m
[2K[2mInstalled [1m4 packages[0m [2min 17ms[0m[0m
 [32m+[39m [1mpassword-strength[0m[2m==0.0.3.post2[0m
 [32m+[39m [1msseclient-py[0m[2m==1.8.0[0m
 [32m+[39m [1mtabpfn-client[0m[2m==0.1.10[0m
 [31m-[39m [1mtyping-extensions[0m[2m==4.15.0[0m
 [32m+[39m [1mtyping-extensions[0m[2m=

Mounting datasets:

In [None]:
from google.colab import drive
drive.mount('/content/MyDrive')

Mounted at /content/MyDrive


# Dataset hill-valley (OpenML ID = 1479)

## TabPFN:

In [None]:
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, roc_auc_score
import numpy as np
import torch

from tabpfn_client import TabPFNClassifier, init

init()

# Pick the best available device
device = "cuda" if torch.cuda.is_available() else (
    "mps" if getattr(torch.backends, "mps", None) and torch.backends.mps.is_available() else "cpu"
)
print("Using device:", device)

# Load a CLASSIFICATION dataset (OpenML 1479)  ← already categorical labels
df = fetch_openml(data_id=1479, as_frame=True)
X = df.data.to_numpy().astype(np.float32)   # keep inputs float32 for GPU memory efficiency
y = df.target.to_numpy()                    # TabPFN handles discrete targets

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.5, random_state=42, stratify=y
)

# Initialize classifier on GPU (or MPS/CPU fallback)
clf = TabPFNClassifier()
clf.fit(X_train, y_train)

proba = clf.predict_proba(X_test)
#print("ROC AUC:", roc_auc_score(y_test, proba, multi_class="ovr"))
print("ROC AUC:", roc_auc_score(y_test.ravel(), proba[:, 1]))

pred = clf.predict(X_test)
print("Accuracy:", accuracy_score(y_test.astype(int), pred))


  Welcome to TabPFN!

  TabPFN is still under active development, and we are working hard to make it better.
  Please bear with us if you encounter any issues.


Opening browser for login. Please complete the login/registration process in your browser and return here.


Could not open browser automatically. Falling back to command-line login...

  Please choose one of the following options:
  (1) Create a TabPFN account
  (2) Login to your TabPFN account

  Please enter your choice: 1

  Please refer to our terms and conditions at: https://www.priorlabs.ai/terms By using TabPFN, you agree to the following terms and conditions:
  Do you agree to the above terms and conditions? (y/n): y
  Please enter your email: viniciusmatamota08@gmail.com

  Password requirements (minimum):
  . Length(8)
  . Uppercase(1)
  . Numbers(1)
  . Special(1)

  Please enter your password: ··········
  Password requirements not satisfied.

  Please enter your password: ··········
  Please confirm your password

Processing: 100%|██████████| [00:03<00:00]


ROC AUC: 0.9963729046171944


Processing: 100%|██████████| [00:02<00:00]

Accuracy: 0.9735973597359736





# Dataset Rain in Australia

# TabPFN

Without pre-processing of categorical features:

In [None]:
import os, json
import numpy as np
import torch
from sklearn.preprocessing import OrdinalEncoder
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score, roc_auc_score
from tabpfn_client import TabPFNClassifier, init

init()

# ---- paths ----
DATA_DIR = '/content/MyDrive/MyDrive/Datasets/Rain_in_Australia'

# ---- device ----
device = "cuda" if torch.cuda.is_available() else (
    "mps" if getattr(torch.backends, "mps", None) and torch.backends.mps.is_available() else "cpu"
)
print("Using device:", device)

# ---- utils ----
def load(name):
    p = os.path.join(DATA_DIR, name)
    return np.load(p, allow_pickle=True) if os.path.exists(p) else None

def ravel1d(a):
    return a.ravel() if a is not None and a.ndim > 1 else a

# ---- load arrays ----
C_train, N_train, y_train = load('C_train.npy'), load('N_train.npy'), load('y_train.npy')
C_val,   N_val,   y_val   = load('C_val.npy'),   load('N_val.npy'),   load('y_val.npy')
C_test,  N_test,  y_test  = load('C_test.npy'),  load('N_test.npy'),  load('y_test.npy')

print("C_train:", C_train.shape, C_train.dtype)
# ---- build X by concatenating [C | N] ----
def concat_features(C_part, N_part):
    parts = [p for p in (C_part, N_part) if p is not None]
    if not parts:
        raise ValueError("No features found (need at least C_* or N_*).")
    return np.concatenate(parts, axis=1) if len(parts) > 1 else parts[0]

X_train = concat_features(C_train, N_train)
X_val   = concat_features(C_val,   N_val) if (C_val is not None or N_val is not None) else None
X_test  = concat_features(C_test,  N_test)

# Reducing size
# Fix seed for reproducibility
rng = np.random.default_rng(seed=42)

# Random indices for 10k samples
idx = rng.choice(X_train.shape[0], size=10000, replace=False)
idx2 = rng.choice(X_test.shape[0], size=10000, replace=False)

# Subsample
X_train = X_train[idx]
y_train = y_train[idx]
X_test = X_test[idx2]
y_test = y_test[idx2]


print("X_train:", X_train.shape, X_train.dtype)
print("X_test :", X_test.shape,  X_test.dtype)
print("y_train:", y_train.shape,  y_train.dtype)
print("y_test:", y_test.shape,  y_test.dtype)
print("y_test  unique:", np.unique(y_test))

# ---- train TabPFN ----
clf = TabPFNClassifier()
clf.fit(X_train, y_train)

# ---- predict & metrics (multiclass: 3 classes) ----
proba = clf.predict_proba(X_test)
print("ROC AUC (OvR):", roc_auc_score(y_test, proba, multi_class="ovr"))
pred = clf.predict(X_test)
print("Accuracy:", accuracy_score(y_test, pred))


Using device: cuda
C_train: (93094, 6) object
X_train: (10000, 18) object
X_test : (10000, 18) object
y_train: (10000,) int64
y_test: (10000,) int64
y_test  unique: [0 1 2]


Processing: 100%|██████████| [00:29<00:00]


ROC AUC (OvR): 0.8812873050146299


Processing: 100%|██████████| [00:29<00:00]


Accuracy: 0.835


# Dataset Eye movements

In [None]:
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, roc_auc_score
import numpy as np
import torch

from tabpfn_client import TabPFNClassifier, init

init()

# Pick the best available device
device = "cuda" if torch.cuda.is_available() else (
    "mps" if getattr(torch.backends, "mps", None) and torch.backends.mps.is_available() else "cpu"
)
print("Using device:", device)

# Load a CLASSIFICATION dataset (OpenML 43946)  ← already categorical labels
df = fetch_openml(data_id=43946, as_frame=True)
X = df.data.to_numpy().astype(np.float32)   # keep inputs float32 for GPU memory efficiency
y = df.target.to_numpy()                    # TabPFN handles discrete targets

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.5, random_state=42, stratify=y
)

print("X_train:", X_train.shape, X_train.dtype)
print("X_test :", X_test.shape,  X_test.dtype)
print("y_train:", y_train.shape,  y_train.dtype)
print("y_test:", y_test.shape,  y_test.dtype)
print("y_test  unique:", np.unique(y_test))

# Initialize classifier on GPU (or MPS/CPU fallback)
clf = TabPFNClassifier()
clf.fit(X_train, y_train)

proba = clf.predict_proba(X_test)
#print("ROC AUC:", roc_auc_score(y_test, proba, multi_class="ovr"))
print("ROC AUC:", roc_auc_score(y_test.ravel(), proba[:, 1]))

pred = clf.predict(X_test)
print("Accuracy:", accuracy_score(y_test, pred))


Using device: cpu
X_train: (3804, 23) float32
X_test : (3804, 23) float32
y_train: (3804,) int64
y_test: (3804,) int64
y_test  unique: [0 1]


Processing: 100%|██████████| [00:06<00:00]


ROC AUC: 0.667914868515183


Processing: 100%|██████████| [00:05<00:00]


Accuracy: 0.6230283911671924


# Dataset Bank note authentication

In [None]:
def LAC_conformal_score(proba, true_labels):
    n_samples = len(true_labels)
    true_proba = proba[np.arange(n_samples), true_labels]
    conformal_scores = np.ones(n_samples) - true_proba
    return conformal_scores

def aps_conformal_score(proba, true_labels):
    # Create a mask for each sample: scores >= true_score
    true_proba = proba[np.arange(proba.shape[0]), true_labels]
    mask = proba >= true_proba[:, np.newaxis]
    # Sum along the class axis
    conformal_scores = np.sum(proba * mask, axis=1)
    return conformal_scores

def conformal_quantile(scores, alpha):
    n = len(scores)
    quantile_level = (n + 1) * (1 - alpha) / n
    return np.quantile(scores, quantile_level, interpolation="higher")

def prediction_set(model, X_test, q_hat):
    # Get probabilities from the model
    proba = model.predict_proba(X_test)
    n_samples, n_classes = proba.shape

    # Store conformal scores for each candidate label
    s_score_test = np.empty((n_samples, n_classes), dtype=float)

    for i in range(n_classes):
        y = np.full(n_samples, i, dtype=int)
        s_score_test[:, i] = aps_conformal_score(proba, y)

    # Build prediction set: keep classes where score ≤ q_hat
    mask = s_score_test <= q_hat
    return mask

def set_size(mask):
    n_samples, n_classes = mask.shape
    avg_set_size = mask.sum()/n_samples

    return avg_set_size

def coverage_rate(mask, true_labels):
    n_samples = len(true_labels)
    indicator = mask[np.arange(n_samples), true_labels]

    return np.mean(indicator)





In [None]:
import os, json
import numpy as np
import torch
from sklearn.preprocessing import OrdinalEncoder
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score, roc_auc_score
from tabpfn_client import TabPFNClassifier, init

init()

# ---- paths ----
DATA_DIR = '/content/MyDrive/MyDrive/Datasets/banknote_authentication'

# ---- device ----
device = "cuda" if torch.cuda.is_available() else (
    "mps" if getattr(torch.backends, "mps", None) and torch.backends.mps.is_available() else "cpu"
)
print("Using device:", device)

# ---- utils ----
def load(name):
    p = os.path.join(DATA_DIR, name)
    return np.load(p, allow_pickle=True) if os.path.exists(p) else None

def ravel1d(a):
    return a.ravel() if a is not None and a.ndim > 1 else a

# ---- load arrays ----
N_train, y_train = load('N_train.npy'), load('y_train.npy')
N_val,   y_val   = load('N_val.npy'),   load('y_val.npy')
N_test,  y_test  = load('N_test.npy'),  load('y_test.npy')

# ---- build X by concatenating [C | N] ----
def concat_features(C_part, N_part):
    parts = [p for p in (C_part, N_part) if p is not None]
    if not parts:
        raise ValueError("No features found (need at least C_* or N_*).")
    return np.concatenate(parts, axis=1) if len(parts) > 1 else parts[0]

X_train = N_train
X_val   = N_val
X_test  = N_test


print("X_train:", X_train.shape, X_train.dtype)
print("X_test :", X_test.shape,  X_test.dtype)
print("y_train:", y_train.shape,  y_train.dtype)
print("y_test:", y_test.shape,  y_test.dtype)
print("y_test  unique:", np.unique(y_test))

# ---- train TabPFN ----
clf = TabPFNClassifier()
clf.fit(X_train, y_train)

# ---- predict & metrics (multiclass: 3 classes) ----
proba = clf.predict_proba(X_test)
print("ROC AUC (OvR):", roc_auc_score(y_test.ravel(), proba[:, 1]))
pred = clf.predict(X_test)
print("Accuracy:", accuracy_score(y_test, pred))

# ----- Uncertainty Quantification -------
proba_cal = clf.predict_proba(X_val) # computes probabilities
calibration_scores = aps_conformal_score(proba_cal, y_val) # computes s-score for calibration set
q_hat = conformal_quantile(calibration_scores, alpha=0.1) # computes the quantile

print("Quantile value: ", q_hat)

C_test = prediction_set(clf, X_test, q_hat) # computing prediction set
cover_rate = coverage_rate(C_test, y_test) # computing the coverage rate of the set C

print("Coverage rate: ", cover_rate)

u_q = set_size(C_test) # computing set size

print("Set Size: ", u_q)




Using device: cuda
X_train: (877, 4) float64
X_test : (275, 4) float64
y_train: (877,) int64
y_test: (275,) int64
y_test  unique: [0 1]


Processing: 100%|██████████| [00:01<00:00]


ROC AUC (OvR): 0.4795885567341691


Processing: 100%|██████████| [00:00<00:00]


Accuracy: 0.5563636363636364


Processing: 100%|██████████| [00:00<00:00]
Users of the modes 'nearest', 'lower', 'higher', or 'midpoint' are encouraged to review the method they used. (Deprecated NumPy 1.22)
  q_hat = conformal_quantile(calibration_scores, alpha=0.1) # computes the quantile


Quantile:  1.0000000298023224


Processing: 100%|██████████| [00:01<00:00]


Coverage rate:  0.96
Set Size:  1.9163636363636363


In [None]:
import numpy as np
from numpy._typing import NDArray

def confidence_score(probs: NDArray):
    return np.max(-probs, axis=1)

def margin_score(probs: NDArray):
    sorted_probs = np.sort(probs, axis=1)
    return sorted_probs[:, -2] - sorted_probs[:, -1]

def entropy_score(probs: NDArray, eps = 1e-9):
    return -np.sum(probs * np.log(probs + eps), axis=1)

def nnl_score(probs: NDArray, true_labels: NDArray, eps = 1e-9):
    return -np.log(probs[np.arange(probs.shape[0]), true_labels] + eps)

def ri_score(probs: NDArray, eps = 1e-9):
    return -np.sum(np.log(probs + eps), axis=1)


def lac_conformal_score(probs: NDArray, true_labels: NDArray):
    """
    Compute the LAC conformal score for a batch of softmax score vectors and true labels.

    Parameters:
    - probs: 2D numpy array of shape (n_samples, num_classes), softmax probs for each sample
    - true_labels: 1D numpy array of shape (n_samples,), true class labels for each sample

    Returns:
    - conformal_scores: 1D numpy array of shape (n_samples,), LAC conformal probs for each sample
    """
    conformal_scores = 1 - probs[np.arange(probs.shape[0]), true_labels]
    return conformal_scores

def aps_conformal_score(probs: NDArray, true_labels: NDArray):
    """
    Compute the APS conformal score for a batch of softmax score vectors and true labels.

    Parameters:
    - probs: 2D numpy array of shape (n_samples, num_classes), softmax probs for each sample
    - true_labels: 1D numpy array of shape (n_samples,), true class labels for each sample

    Returns:
    - conformal_scores: 1D numpy array of shape (n_samples,), APS conformal probs for each sample
    """
    # Create a mask for each sample: probs >= true_score
    true_scores = probs[np.arange(probs.shape[0]), true_labels]
    mask = probs >= true_scores[:, np.newaxis]
    # Sum along the class axis
    conformal_scores = np.sum(probs * mask, axis=1)

    return conformal_scores

def compute_quantile(probs: NDArray, calibration_labels, n: int, type = "lac", alpha = 0.1):
    if type == "lac":
        scores = lac_conformal_score(probs, calibration_labels)
    elif type == "aps":
        scores = aps_conformal_score(probs, calibration_labels)
    else:
        raise AttributeError(f"type {type} is not supported. Use 'lac' or 'aps'")

    q_level = np.ceil((n + 1) * (1 - alpha)) / n
    return np.quantile(scores, q_level, method="higher")

def lac_prediction_set(calibration_probs: NDArray, probs: NDArray, calibration_labels: NDArray, alpha = 0.1):
    n = calibration_labels.shape[0]
    cal_scores = 1 - calibration_probs[np.arange(calibration_probs.shape[0]), calibration_labels]
    # Get the score quantile

    q_level = np.ceil((n + 1) * (1 - alpha)) / n
    qhat = np.quantile(cal_scores, q_level, method='higher')

    prediction_sets = probs >= (1 - qhat)
    return prediction_sets

def aps_prediction_set(calibration_probs: NDArray, probs: NDArray, calibration_labels: NDArray, alpha = 0.1):
    # Get scores. calib_X.shape[0] == calib_Y.shape[0] == n
    n = calibration_labels.shape[0]
    cal_order = calibration_probs.argsort(1)[:,::-1]
    # cal_sum = cal_probs[np.arange(n)[:, None], cal_pi].cumsum(axis=1)
    cal_sum = np.take_along_axis(calibration_probs, cal_order, axis=1).cumsum(axis=1)
    cal_scores = np.take_along_axis(cal_sum, cal_order.argsort(axis=1), axis=1)[range(n),calibration_labels]

    # Get the score quantile
    q_level = np.ceil((n + 1) * (1 - alpha)) / n
    qhat = np.quantile(cal_scores, q_level, method='higher')

    # Deploy (output=list of length n, each element is tensor of classes)
    test_order = probs.argsort(1)[:,::-1]
    test_sum = np.take_along_axis(probs,test_order,axis=1).cumsum(axis=1)
    prediction_sets = np.take_along_axis(test_sum <= qhat, test_order.argsort(axis=1), axis=1)
    return prediction_sets

def raps_prediction_set(calibration_probs: NDArray, test_probs: NDArray, calibration_labels: NDArray, alpha = 0.1, lam_reg=0.01, k_reg = 5, disallow_zero_sets = False, rand = True):
    probs = np.concatenate([calibration_probs, test_probs], axis=0)
    k_reg = min(k_reg, probs.shape[1] - 1)
    reg_vec = np.array(k_reg * [0,] + (probs.shape[1] - k_reg) * [lam_reg,])[None, :]

    n = calibration_labels.shape[0]
    cal_order = calibration_probs.argsort(axis=1)[:,::-1]
    cal_sort = np.take_along_axis(calibration_probs, cal_order, axis=1)
    cal_sort_reg = cal_sort + reg_vec
    cal_true_labels = np.where(cal_order == calibration_labels[:,None])[1]
    cal_scores = cal_sort_reg.cumsum(axis=1)[np.arange(n), cal_true_labels] - np.random.rand(n) * cal_sort_reg[np.arange(n), cal_true_labels]

    # Get the score quantile
    q_level = np.ceil((n + 1) * (1 - alpha)) / n
    qhat = np.quantile(cal_scores, q_level, method='higher')

    n_test = test_probs.shape[0]
    test_order = test_probs.argsort(1)[:,::-1]
    test_sort = np.take_along_axis(test_probs, test_order, axis=1)
    test_sort_reg = test_sort + reg_vec
    test_srt_reg_cumsum = test_sort_reg.cumsum(axis=1)
    indicators = (test_srt_reg_cumsum - np.random.rand(n_test, 1) * test_sort_reg) <= qhat if rand else test_srt_reg_cumsum - test_sort_reg <= qhat

    if disallow_zero_sets: indicators[:,0] = True
    prediction_sets = np.take_along_axis(indicators, test_order.argsort(axis=1), axis=1)
    return prediction_sets

def accuracy(y_true, y_pred):
    return np.mean(y_true == y_pred)

def set_size(pred_set):
    return np.mean([np.sum(ps) for ps in pred_set])

def coverage_rate(y_true, pred_set):
    return pred_set[np.arange(pred_set.shape[0]), y_true].mean()

In [None]:
import os, json
import numpy as np
import torch
from sklearn.preprocessing import OrdinalEncoder
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score, roc_auc_score
from tabpfn_client import TabPFNClassifier, init

init()

# ---- paths ----
DATA_DIR = '/content/MyDrive/MyDrive/Datasets/banknote_authentication'

# ---- device ----
device = "cuda" if torch.cuda.is_available() else (
    "mps" if getattr(torch.backends, "mps", None) and torch.backends.mps.is_available() else "cpu"
)
print("Using device:", device)

# ---- utils ----
def load(name):
    p = os.path.join(DATA_DIR, name)
    return np.load(p, allow_pickle=True) if os.path.exists(p) else None

def ravel1d(a):
    return a.ravel() if a is not None and a.ndim > 1 else a

# ---- load arrays ----
N_train, y_train = load('N_train.npy'), load('y_train.npy')
N_val,   y_val   = load('N_val.npy'),   load('y_val.npy')
N_test,  y_test  = load('N_test.npy'),  load('y_test.npy')

# ---- build X by concatenating [C | N] ----
def concat_features(C_part, N_part):
    parts = [p for p in (C_part, N_part) if p is not None]
    if not parts:
        raise ValueError("No features found (need at least C_* or N_*).")
    return np.concatenate(parts, axis=1) if len(parts) > 1 else parts[0]

X_train = N_train
X_val   = N_val
X_test  = N_test


print("X_train:", X_train.shape, X_train.dtype)
print("X_test :", X_test.shape,  X_test.dtype)
print("y_train:", y_train.shape,  y_train.dtype)
print("y_test:", y_test.shape,  y_test.dtype)
print("y_test  unique:", np.unique(y_test))

# ---- train TabPFN ----
clf = TabPFNClassifier()
clf.fit(X_train, y_train)

# ---- predict & metrics (multiclass: 3 classes) ----
proba = clf.predict_proba(X_test)
proba_cal = clf.predict_proba(X_val) # computes probabilities

  Welcome to TabPFN!

  TabPFN is still under active development, and we are working hard to make it better.
  Please bear with us if you encounter any issues.


Opening browser for login. Please complete the login/registration process in your browser and return here.


Could not open browser automatically. Falling back to command-line login...

  Please choose one of the following options:
  (1) Create a TabPFN account
  (2) Login to your TabPFN account

  Please enter your choice: 1

  Please refer to our terms and conditions at: https://www.priorlabs.ai/terms By using TabPFN, you agree to the following terms and conditions:
  Do you agree to the above terms and conditions? (y/n): y
  Please enter your email: deodato.neto@ga.ita.br

  Password requirements (minimum):
  . Length(8)
  . Uppercase(1)
  . Numbers(1)
  . Special(1)

  Please enter your password: ··········
  Please confirm your password: ··········
  Entered password and confirmation password do not match, please try agai

Processing: 100%|██████████| [00:01<00:00]
Processing: 100%|██████████| [00:01<00:00]


NameError: name 'cal_proba' is not defined

In [None]:
lac_pred_set = lac_prediction_set(proba_cal, proba, y_val)
aps_pred_set = aps_prediction_set(proba_cal, proba, y_val)
raps_pred_set = raps_prediction_set(proba_cal, proba, y_val)

print("ROC AUC:", roc_auc_score(y_test, proba[:,1]))
y_pred = np.argmax(proba, axis=1)
if hasattr(clf, "label_encoder_") and clf.label_encoder_ is not None:
    y_pred = clf.label_encoder_.inverse_transform(y_pred)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("SS (LAC):", set_size(lac_pred_set))
print("SS (APS):", set_size(aps_pred_set))
print("SS (RAPS):", set_size(raps_pred_set))
print("CR (LAC):", coverage_rate(y_test, lac_pred_set))
print("CR (APS):", coverage_rate(y_test, aps_pred_set))
print("CR (RAPS):", coverage_rate(y_test, raps_pred_set))

ROC AUC: 0.4915354119789993
Accuracy: 0.5563636363636364
SS (LAC): 1.6181818181818182
SS (APS): 1.9272727272727272
SS (RAPS): 1.8109090909090908
CR (LAC): 0.8290909090909091
CR (APS): 0.9709090909090909
CR (RAPS): 0.9163636363636364
