# Conformal prediction 

In [30]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from ast import literal_eval

import warnings
warnings.filterwarnings('ignore')

Execute a célula abaixo para utilizar o `cifar10_probs.csv`, que contém logits da ResNet110 no conjunto de teste do CIFAR-10:

In [31]:
# cell to run on cifar10_probs.csv (from resnet110)

# make df where column "label" is column 10 of data_cifar10_probs and column "output" is an array of the first 10 columns of data_cifar10_probs as a list of floats
data_cifar10_probs = pd.read_csv('../data-files/cifar10_probs.csv', sep=',', header=None)
data_cifar10_probs = data_cifar10_probs.rename(columns={10: "label"})
data_cifar10_probs["output"] = data_cifar10_probs.iloc[:, 0:10].values.tolist()

# organize df as conformal requires
data_cifar10_probs = data_cifar10_probs.drop(data_cifar10_probs.columns[0:10], axis=1)
data_cifar10_probs["output"] = data_cifar10_probs["output"].astype(str)

data_cifar10_probs

Unnamed: 0,label,output
0,3,"[2.3661317e-09, 1.266867e-08, 8.9752067e-10, 0..."
1,8,"[2.0486328e-11, 8.667231e-08, 4.1953889e-16, 1..."
2,8,"[1.9216972e-07, 0.002469865, 3.1885104e-12, 3...."
3,0,"[0.999424, 1.3174234e-05, 4.9130163e-06, 2.804..."
4,6,"[3.4858956e-09, 1.7338067e-06, 1.1511221e-06, ..."
...,...,...
9995,8,"[0.20628296, 1.48559475e-05, 1.4134733e-06, 2...."
9996,3,"[4.3346837e-14, 1.0565218e-13, 8.082701e-10, 1..."
9997,5,"[8.1107293e-10, 9.9917585e-09, 7.729877e-08, 1..."
9998,1,"[1.5138195e-08, 0.99875677, 1.074252e-05, 3.15..."


Execute a célula abaixo para utilizar o `labels_convolutional_cifar10.csv`, que contém logits de uma rede convolucional própria no conjunto de teste do CIFAR-10:

In [32]:
# cell to run on labels_convolutional_cifar10.csv

# # we have to remember to turn " " into ","
# data1 = pd.read_csv('labels (2).csv', sep=';')
# # get types of columns
# type(data1["output"][0])

# we have to remember to turn " " into ","

In [33]:
smx = data_cifar10_probs['output'] # softmax probabilities
labels = data_cifar10_probs['label'].astype(int) # true labels

# create an empty array
lista = np.array([])
for i in range(len(smx)):
    lista = np.append(lista, literal_eval(smx[i]))

smx = lista.reshape(len(smx), 10)
# print(smx.shape) # (10000, 10)

#### Classical conformal prediction

In [34]:
# Problem setup
n=500 # number of calibration points
alpha = 0.02 # 1-alpha is the desired coverage

In [35]:
# Split the softmax scores into calibration and validation sets (save the shuffling)

# n Trues e (smx.shape[0]-n) Falses
idx = np.array([1] * n + [0] * (smx.shape[0]-n)) > 0 

# embaralha os Trues e Falses
np.random.shuffle(idx) 

# pega os valores de softmax de acordo com os Trues e Falses
cal_smx, val_smx = smx[idx,:], smx[~idx, :] 

# pega os valores de labels de acordo com os Trues e Falses
cal_labels, val_labels = np.array(labels[idx]), np.array(labels[~idx])

In [36]:
print(cal_smx.shape, val_smx.shape)
print(cal_labels.shape, val_labels.shape)

(500, 10) (9500, 10)
(500,) (9500,)


Here we have the actual conformal prediction:

In [37]:
# 1: get conformal scores. n = calib_Y.shape[0]

# criamos um vetor cal_scores com (1 -(probabilidade atribuida pelo modelo de que a imagem tenha seu label verdadeiro))
# cal score é quanto menor, melhor
cal_scores = 1-cal_smx[np.arange(n),cal_labels]

# 2: get adjusted quantile
# qhat será o valor de s_i (entrada de cal_scores) que limita os 1-alpha menores scores (os melhores!)
q_level = np.ceil((n+1)*(1-alpha))/n
qhat = np.quantile(cal_scores, q_level, interpolation='higher')

prediction_sets = val_smx >= (1-qhat) # 3: form prediction sets

In [38]:
prediction_sets.shape

(9500, 10)

In [39]:
empirical_coverage = prediction_sets[np.arange(prediction_sets.shape[0]),val_labels].mean()
print(f"The empirical coverage is: {empirical_coverage}")

The empirical coverage is: 0.9868421052631579


Here we do conformal prediction but we only want one label in the prediction set, so we look for the alpha when the set goes from 2 to 1 label:

In [None]:
cal_scores = 1 - cal_smx[np.arange(n), cal_labels]
alphas = []
prediction_sets = []
actual_labels = []
i =0 
for i, image in enumerate(val_smx):
    # try alphas until there's only one class in the prediction set
    for alpha in np.arange(0.01, 1, 0.0001):
        q_level = np.ceil((n + 1) * (1 - alpha)) / n
        qhat = np.quantile(cal_scores, q_level, interpolation='higher')

        prediction_set = image >= (1 - qhat)  # 3: form prediction sets

        if np.sum(prediction_set) == 1:
            prediction_sets.append(prediction_set)
            alphas.append(alpha)
            # put val_labels[i] in actual_labels
            actual_labels.append(val_labels[i])
            break  # exit the inner loop if an alpha is found

In [None]:
# to array prediction_sets
prediction_sets = np.array(prediction_sets)
# to array actual_labels
actual_labels = np.array(actual_labels)

In [None]:
empirical_coverage = prediction_sets[np.arange(prediction_sets.shape[0]),actual_labels].mean()
print(f"The empirical coverage is: {empirical_coverage}")

The empirical coverage is: 0.9368732186213449


In [None]:
# create fake logits list (1-alphas)
fake_logits = 1 - np.array(alphas)
fake_logits

array([0.99 , 0.99 , 0.982, ..., 0.99 , 0.988, 0.99 ])

#### Predicting one image

In [None]:
# Problem setup
n=500 # number of calibration points
alpha = 0.02 # 1-alpha is the desired coverage

In [None]:
# Split the softmax scores into calibration and validation sets (save the shuffling)

# n Trues e (smx.shape[0]-n) Falses
idx = np.array([1] * n + [0] * (smx.shape[0]-n)) > 0 

# embaralha os Trues e Falses
np.random.shuffle(idx) 

# pega os valores de softmax de acordo com os Trues e Falses
cal_smx, val_smx = smx[idx,:], smx[~idx, :] 
val_smx = val_smx[2].reshape(1,10) #use to have 1 img to predict

# pega os valores de labels de acordo com os Trues e Falses
cal_labels, val_labels = np.array(labels[idx]), np.array(labels[~idx])
cal_labels, val_labels = np.array(labels[idx]), np.array(labels[~idx])[2] #use to have 1 img to predict

### Functions:
Here we assume CIFAR10 dataset, but it can be any dataset, just remember to set n_classes.

In [53]:
# receives directory of datafile in csv format and returns a dataframe
# datafile must be: prob1, prob2, ..., prob10, label
def datafile_to_df(datafile_directory, n_classes=10):
    data = pd.read_csv(datafile_directory, sep=',', header=None)
    data = data.rename(columns={n_classes: "label"})
    data["output"] = data.iloc[:, 0:n_classes].values.tolist()
    data = data.drop(data.columns[0:n_classes], axis=1)
    data["output"] = data["output"].astype(str)
    return data

In [54]:
# receives data from datafile_to_df and returns smx and labels
def get_smx_and_labels(data, n_classes=10):
    smx = data['output'] 
    labels = data['label'].astype(int)

    lista = np.array([])
    for i in range(len(smx)):
        lista = np.append(lista, literal_eval(smx[i]))

    smx = lista.reshape(len(smx), n_classes)
    return smx, labels

In [55]:
# function to generate calibration and validation sets (both smx and labels)
def generate_cal_val_sets(smx, labels, size_cal_set):
    idx = np.array([1] * size_cal_set + [0] * (smx.shape[0]-size_cal_set)) > 0 
    np.random.shuffle(idx) 
    cal_smx, val_smx = smx[idx,:], smx[~idx, :] 
    cal_labels, val_labels = np.array(labels[idx]), np.array(labels[~idx])
    
    return cal_smx, val_smx, cal_labels, val_labels

In [56]:
# function to do actual conformal prediction
def conformal(cal_smx, cal_labels, val_labels, val_smx, size_cal_set):
    cal_scores = 1 - cal_smx[np.arange(size_cal_set), cal_labels]
    alphas = []
    prediction_sets = []
    actual_labels = []
    i = 0

    for i, image in enumerate(val_smx):
        # try alphas until there's only one class in the prediction set
        for alpha in np.arange(0.01, 1, 0.0001):
            q_level = np.ceil((size_cal_set + 1) * (1 - alpha)) / size_cal_set
            qhat = np.quantile(cal_scores, q_level, interpolation='higher')

            prediction_set = image >= (1 - qhat)

            if np.sum(prediction_set) == 1:
                prediction_sets.append(prediction_set)
                alphas.append(alpha)
                actual_labels.append(val_labels[i])
                break

            elif alpha >= 0.999:
                prediction_sets.append(prediction_set)
                alphas.append(alpha)
                actual_labels.append(val_labels[i])
                break

    return prediction_sets, alphas, actual_labels

In [57]:
# generate empirical coverage of conformal prediction
def evaluate_conformal(prediction_sets, actual_labels):
    prediction_sets = np.array(prediction_sets)
    actual_labels = np.array(actual_labels)
    empirical_coverage = prediction_sets[np.arange(prediction_sets.shape[0]),actual_labels].mean()
    print(f"The empirical coverage is: {empirical_coverage}")
    return empirical_coverage

In [58]:
def create_fake_logits(alphas):
    fake_logits = 1 - np.array(alphas)
    return fake_logits

In [59]:
# test our functions
data = datafile_to_df("../data-files/cifar10_probs.csv")
smx, labels = get_smx_and_labels(data)
cal_smx, val_smx, cal_labels, val_labels = generate_cal_val_sets(smx, labels, 500)
prediction_sets, alphas, actual_labels = conformal(cal_smx, cal_labels, val_labels, val_smx, 500)
empirical_coverage = evaluate_conformal(prediction_sets, actual_labels)
fake_logits = create_fake_logits(alphas)

The empirical coverage is: 0.9375593291846852


In [64]:
fake_logits

array([0.99 , 0.99 , 0.978, ..., 0.99 , 0.98 , 0.99 ])

In [49]:
cal_smx

array([[5.32159560e-09, 2.90598440e-08, 6.05252570e-12, ...,
        2.43733570e-11, 1.00000000e+00, 6.95829800e-09],
       [6.39322760e-07, 5.89543530e-08, 2.75454200e-06, ...,
        9.70621600e-07, 3.20337360e-08, 3.29595660e-08],
       [1.20873450e-06, 1.06236540e-04, 5.98267930e-06, ...,
        6.13088900e-08, 1.97542830e-05, 2.48818740e-05],
       ...,
       [1.10834820e-04, 1.19870450e-05, 1.84343880e-03, ...,
        6.89586800e-03, 2.81216460e-04, 9.82191300e-07],
       [5.02782260e-10, 1.10962555e-08, 6.88817600e-10, ...,
        2.96725700e-09, 3.79553940e-10, 3.04901400e-08],
       [2.27233250e-09, 1.51230020e-09, 8.39111400e-08, ...,
        1.15076500e-07, 1.00618200e-09, 2.20164620e-10]])

In [50]:
cal_smx.shape

(500, 10)

In [52]:
cal_labels.shape

(500,)