In [7]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import time
from tqdm import tqdm
from scipy.special import softmax
import torch.utils.data as tdata
import matplotlib.pyplot as plt 
import torch.backends.cudnn as cudnn
import random

In [8]:
import os
import shutil
from google.colab import drive
drive.mount('/content/drive')
checkpoints = '/content/drive/My Drive/Colab Notebooks/cse559a/'
if not os.path.exists(checkpoints):
  os.makedirs(checkpoints)
  
if not os.path.exists('imagenet_val'):
  if not os.path.exists(checkpoints + 'imagenet_val.tar'):
    print("Downloading archive...")
    os.chdir(checkpoints)
    !wget -nv -O imagenet_val.tar -L https://image-net.org/data/ILSVRC/2012/ILSVRC2012_img_val.tar
    os.chdir('/content/')
  print("Copying to local runtime...")
  shutil.copy(checkpoints + 'imagenet_val.tar', './imagenet_val.tar')
  print("Uncompressing...")
  !mkdir imagenet_val
  !tar -xf imagenet_val.tar -C ./imagenet_val/
  !rm imagenet_val.tar
  os.chdir('./imagenet_val') 
  !wget -qO- https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh | bash
  os.chdir('/content/')
print("Data ready!")

if not os.path.exists('exp_v0.py'):
  shutil.copy(checkpoints + 'exp_v0.py', './exp_v0.py')
  print("File imported")
from exp_v0 import *

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Data ready!


In [9]:
class ConformalModel(nn.Module):
    def __init__(self, model, calib_loader, alpha, kreg=None, lamda=None, randomized=True, batch_size=32):
        super(ConformalModel, self).__init__()
        self.model = model 
        self.alpha = alpha
        self.T = torch.Tensor([1.3]) #initialize (1.3 is usually a good value)
        self.T, calib_logits = platt(self, calib_loader)
        self.randomized=randomized
        self.num_classes = len(calib_loader.dataset.dataset.classes)
        self.penalties = np.zeros((1, self.num_classes))
        self.penalties[:, kreg:] += lamda 

        calib_loader = tdata.DataLoader(calib_logits, batch_size = batch_size, shuffle=False, pin_memory=True)

        self.Qhat = conformal_calibration_logits(self, calib_loader)

    def forward(self, *args, randomized=None,  **kwargs):
        if randomized == None:
            randomized = self.randomized
        logits = self.model(*args, **kwargs)
        
        with torch.no_grad():
            logits_numpy = logits.detach().cpu().numpy()
            scores = softmax(logits_numpy/self.T.item(), axis=1)

            I, ordered, cumsum = sort_sum(scores)

            S = gcq(scores, self.Qhat, I=I, ordered=ordered, cumsum=cumsum, penalties=self.penalties, randomized=randomized)

        return logits, S


# Temperature scaling
def platt(cmodel, calib_loader, max_iters=10, lr=0.01, epsilon=0.01):
    print("Begin Platt scaling.")
    # Save logits so don't need to double compute them
    logits_dataset = get_logits_targets(cmodel.model, calib_loader)
    logits_loader = torch.utils.data.DataLoader(logits_dataset, batch_size = calib_loader.batch_size, shuffle=False, pin_memory=True)

    nll_criterion = nn.CrossEntropyLoss().cuda()

    T = nn.Parameter(torch.Tensor([1.3]).cuda())

    optimizer = optim.SGD([T], lr=lr)
    for iter in range(max_iters):
        T_old = T.item()
        for x, targets in logits_loader:
            optimizer.zero_grad()
            x = x.cuda()
            x.requires_grad = True
            out = x/T
            loss = nll_criterion(out, targets.long().cuda())
            loss.backward()
            optimizer.step()
        if abs(T_old - T.item()) < epsilon:
            break 

    print(f"Optimal T={T.item()}")
    return T, logits_dataset 

In [10]:
transform = transforms.Compose([
                transforms.Resize(256),
                transforms.CenterCrop(224),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                      std= [0.229, 0.224, 0.225])
            ])

modelName = 'ResNet152'
cudnn.benchmark = True
batch_size = 128
KREG_VAL = 5
LAMBDA_VAL = 0.01
NUM_TRIALS = 10

df_res = pd.DataFrame(columns=['calib_size','top1_coverage','top5_coverage','raps_coverage', 'raps_size'])

for i in range(NUM_TRIALS):
  for calib_size in [500, 2000, 5000, 10000, 20000]:
    cal_data, val_data, _ = torch.utils.data.random_split(torchvision.datasets.ImageFolder('./imagenet_val/', transform), [calib_size, 20000, 30000-calib_size])
    calib_loader = torch.utils.data.DataLoader(cal_data, batch_size=batch_size, shuffle=True, pin_memory=True)
    val_loader = torch.utils.data.DataLoader(val_data, batch_size=batch_size, shuffle=True, pin_memory=True)
    model = create_model(modelName)
    print(f"Size of the calibration dataset: {calib_size}")
    cmodel = ConformalModel(model, calib_loader, alpha=0.1, kreg=KREG_VAL, lamda=LAMBDA_VAL)
    top1_coverage, top5_coverage, raps_coverage, raps_size = validate(val_loader, cmodel, print_bool=True)
    temp_res = {'calib_size': calib_size,'top1_coverage': top1_coverage,'top5_coverage': top5_coverage,'raps_coverage': raps_coverage, 'raps_size': raps_size}
    df_res = df_res.append(temp_res, ignore_index = True)

Size of the calibration dataset: 500
Begin Platt scaling.
Computing logits for model (only happens once).


100%|██████████| 4/4 [00:05<00:00,  1.27s/it]


Optimal T=1.296091079711914
N: 20000 | Time: 0.346 (1.270) | Cvg@1: 0.750 (0.783) | Cvg@5: 1.000 (0.941) | Cvg@RAPS: 0.812 (0.886) | Size@RAPS: 3.625 (4.105)


  df_res = df_res.append(temp_res, ignore_index = True)


Size of the calibration dataset: 2000
Begin Platt scaling.
Computing logits for model (only happens once).


100%|██████████| 16/16 [00:20<00:00,  1.27s/it]


Optimal T=1.2335331439971924
N: 20000 | Time: 0.308 (1.277) | Cvg@1: 0.688 (0.784) | Cvg@5: 0.938 (0.943) | Cvg@RAPS: 0.844 (0.892) | Size@RAPS: 3.625 (3.705)


  df_res = df_res.append(temp_res, ignore_index = True)


Size of the calibration dataset: 5000
Begin Platt scaling.
Computing logits for model (only happens once).


100%|██████████| 40/40 [00:50<00:00,  1.26s/it]


Optimal T=1.2526161670684814
N: 20000 | Time: 0.306 (1.283) | Cvg@1: 0.875 (0.782) | Cvg@5: 0.906 (0.941) | Cvg@RAPS: 0.938 (0.913) | Size@RAPS: 3.844 (4.337)


  df_res = df_res.append(temp_res, ignore_index = True)


Size of the calibration dataset: 10000
Begin Platt scaling.
Computing logits for model (only happens once).


100%|██████████| 79/79 [01:40<00:00,  1.27s/it]


Optimal T=1.2308167219161987
N: 20000 | Time: 0.323 (1.290) | Cvg@1: 0.906 (0.785) | Cvg@5: 1.000 (0.940) | Cvg@RAPS: 0.938 (0.904) | Size@RAPS: 2.281 (4.039)


  df_res = df_res.append(temp_res, ignore_index = True)


Size of the calibration dataset: 20000
Begin Platt scaling.
Computing logits for model (only happens once).


100%|██████████| 157/157 [03:18<00:00,  1.27s/it]


Optimal T=1.2087279558181763
N: 20000 | Time: 0.305 (1.274) | Cvg@1: 0.688 (0.779) | Cvg@5: 0.938 (0.940) | Cvg@RAPS: 0.906 (0.898) | Size@RAPS: 4.375 (3.853)


  df_res = df_res.append(temp_res, ignore_index = True)


Size of the calibration dataset: 500
Begin Platt scaling.
Computing logits for model (only happens once).


100%|██████████| 4/4 [00:05<00:00,  1.28s/it]


Optimal T=1.2993371486663818
N: 20000 | Time: 0.511 (1.287) | Cvg@1: 0.719 (0.785) | Cvg@5: 0.938 (0.942) | Cvg@RAPS: 0.906 (0.929) | Size@RAPS: 4.281 (5.025)


  df_res = df_res.append(temp_res, ignore_index = True)


Size of the calibration dataset: 2000
Begin Platt scaling.
Computing logits for model (only happens once).


100%|██████████| 16/16 [00:19<00:00,  1.24s/it]


Optimal T=1.2553291320800781
N: 20000 | Time: 0.428 (1.284) | Cvg@1: 0.719 (0.780) | Cvg@5: 1.000 (0.940) | Cvg@RAPS: 0.938 (0.893) | Size@RAPS: 5.031 (4.053)


  df_res = df_res.append(temp_res, ignore_index = True)


Size of the calibration dataset: 5000
Begin Platt scaling.
Computing logits for model (only happens once).


100%|██████████| 40/40 [00:49<00:00,  1.23s/it]


Optimal T=1.2265888452529907
N: 20000 | Time: 0.344 (1.278) | Cvg@1: 0.688 (0.779) | Cvg@5: 0.938 (0.940) | Cvg@RAPS: 0.875 (0.897) | Size@RAPS: 5.656 (3.948)


  df_res = df_res.append(temp_res, ignore_index = True)


Size of the calibration dataset: 10000
Begin Platt scaling.
Computing logits for model (only happens once).


100%|██████████| 79/79 [01:38<00:00,  1.25s/it]


Optimal T=1.1917288303375244
N: 20000 | Time: 0.318 (1.277) | Cvg@1: 0.688 (0.783) | Cvg@5: 0.938 (0.939) | Cvg@RAPS: 0.812 (0.892) | Size@RAPS: 3.344 (3.661)


  df_res = df_res.append(temp_res, ignore_index = True)


Size of the calibration dataset: 20000
Begin Platt scaling.
Computing logits for model (only happens once).


100%|██████████| 157/157 [03:18<00:00,  1.26s/it]


Optimal T=1.2162615060806274
N: 20000 | Time: 0.291 (1.271) | Cvg@1: 0.688 (0.785) | Cvg@5: 0.906 (0.941) | Cvg@RAPS: 0.906 (0.901) | Size@RAPS: 4.281 (3.865)


  df_res = df_res.append(temp_res, ignore_index = True)


Size of the calibration dataset: 500
Begin Platt scaling.
Computing logits for model (only happens once).


100%|██████████| 4/4 [00:04<00:00,  1.20s/it]


Optimal T=1.2941702604293823
N: 20000 | Time: 0.320 (1.285) | Cvg@1: 0.812 (0.778) | Cvg@5: 0.938 (0.940) | Cvg@RAPS: 0.875 (0.882) | Size@RAPS: 4.188 (3.965)


  df_res = df_res.append(temp_res, ignore_index = True)


Size of the calibration dataset: 2000
Begin Platt scaling.
Computing logits for model (only happens once).


100%|██████████| 16/16 [00:20<00:00,  1.26s/it]


Optimal T=1.2919514179229736
N: 20000 | Time: 0.300 (1.274) | Cvg@1: 0.719 (0.784) | Cvg@5: 0.938 (0.939) | Cvg@RAPS: 0.906 (0.910) | Size@RAPS: 5.062 (4.500)


  df_res = df_res.append(temp_res, ignore_index = True)


Size of the calibration dataset: 5000
Begin Platt scaling.
Computing logits for model (only happens once).


100%|██████████| 40/40 [00:49<00:00,  1.24s/it]


Optimal T=1.2191635370254517
N: 20000 | Time: 0.299 (1.275) | Cvg@1: 0.812 (0.786) | Cvg@5: 0.969 (0.942) | Cvg@RAPS: 0.875 (0.902) | Size@RAPS: 2.938 (3.919)


  df_res = df_res.append(temp_res, ignore_index = True)


Size of the calibration dataset: 10000
Begin Platt scaling.
Computing logits for model (only happens once).


100%|██████████| 79/79 [01:39<00:00,  1.26s/it]


Optimal T=1.210644245147705
N: 20000 | Time: 0.299 (1.279) | Cvg@1: 0.750 (0.779) | Cvg@5: 0.844 (0.937) | Cvg@RAPS: 0.875 (0.897) | Size@RAPS: 2.906 (3.950)


  df_res = df_res.append(temp_res, ignore_index = True)


Size of the calibration dataset: 20000
Begin Platt scaling.
Computing logits for model (only happens once).


100%|██████████| 157/157 [03:19<00:00,  1.27s/it]


Optimal T=1.19896399974823
N: 20000 | Time: 0.307 (1.274) | Cvg@1: 0.688 (0.780) | Cvg@5: 0.906 (0.939) | Cvg@RAPS: 0.875 (0.899) | Size@RAPS: 4.250 (3.837)


  df_res = df_res.append(temp_res, ignore_index = True)


Size of the calibration dataset: 500
Begin Platt scaling.
Computing logits for model (only happens once).


100%|██████████| 4/4 [00:05<00:00,  1.25s/it]


Optimal T=1.2983821630477905
N: 20000 | Time: 0.438 (1.278) | Cvg@1: 0.781 (0.782) | Cvg@5: 1.000 (0.942) | Cvg@RAPS: 0.844 (0.902) | Size@RAPS: 3.281 (4.373)


  df_res = df_res.append(temp_res, ignore_index = True)


Size of the calibration dataset: 2000
Begin Platt scaling.
Computing logits for model (only happens once).


100%|██████████| 16/16 [00:19<00:00,  1.23s/it]


Optimal T=1.2646682262420654
N: 20000 | Time: 0.297 (1.297) | Cvg@1: 0.781 (0.785) | Cvg@5: 0.906 (0.942) | Cvg@RAPS: 0.812 (0.908) | Size@RAPS: 5.062 (4.304)


  df_res = df_res.append(temp_res, ignore_index = True)


Size of the calibration dataset: 5000
Begin Platt scaling.
Computing logits for model (only happens once).


100%|██████████| 40/40 [00:50<00:00,  1.26s/it]


Optimal T=1.2024998664855957
N: 20000 | Time: 0.306 (1.299) | Cvg@1: 0.906 (0.782) | Cvg@5: 1.000 (0.939) | Cvg@RAPS: 0.938 (0.896) | Size@RAPS: 2.812 (3.755)


  df_res = df_res.append(temp_res, ignore_index = True)


Size of the calibration dataset: 10000
Begin Platt scaling.
Computing logits for model (only happens once).


100%|██████████| 79/79 [01:39<00:00,  1.26s/it]


Optimal T=1.241711139678955
N: 20000 | Time: 0.354 (1.298) | Cvg@1: 0.812 (0.783) | Cvg@5: 1.000 (0.941) | Cvg@RAPS: 0.969 (0.903) | Size@RAPS: 3.250 (4.078)


  df_res = df_res.append(temp_res, ignore_index = True)


Size of the calibration dataset: 20000
Begin Platt scaling.
Computing logits for model (only happens once).


100%|██████████| 157/157 [03:18<00:00,  1.27s/it]


Optimal T=1.2075508832931519
N: 20000 | Time: 0.320 (1.295) | Cvg@1: 0.781 (0.779) | Cvg@5: 0.969 (0.939) | Cvg@RAPS: 0.969 (0.898) | Size@RAPS: 3.688 (3.896)


  df_res = df_res.append(temp_res, ignore_index = True)


Size of the calibration dataset: 500
Begin Platt scaling.
Computing logits for model (only happens once).


100%|██████████| 4/4 [00:04<00:00,  1.24s/it]


Optimal T=1.295803427696228
N: 20000 | Time: 0.346 (1.292) | Cvg@1: 0.781 (0.780) | Cvg@5: 0.938 (0.940) | Cvg@RAPS: 0.938 (0.895) | Size@RAPS: 4.000 (4.299)


  df_res = df_res.append(temp_res, ignore_index = True)


Size of the calibration dataset: 2000
Begin Platt scaling.
Computing logits for model (only happens once).


100%|██████████| 16/16 [00:19<00:00,  1.25s/it]


Optimal T=1.2640138864517212
N: 20000 | Time: 0.298 (1.296) | Cvg@1: 0.750 (0.780) | Cvg@5: 0.906 (0.940) | Cvg@RAPS: 0.969 (0.899) | Size@RAPS: 6.031 (4.146)


  df_res = df_res.append(temp_res, ignore_index = True)


Size of the calibration dataset: 5000
Begin Platt scaling.
Computing logits for model (only happens once).


100%|██████████| 40/40 [00:49<00:00,  1.23s/it]


Optimal T=1.230911135673523
N: 20000 | Time: 0.316 (1.291) | Cvg@1: 0.812 (0.784) | Cvg@5: 0.969 (0.941) | Cvg@RAPS: 0.938 (0.896) | Size@RAPS: 4.406 (3.878)


  df_res = df_res.append(temp_res, ignore_index = True)


Size of the calibration dataset: 10000
Begin Platt scaling.
Computing logits for model (only happens once).


100%|██████████| 79/79 [01:38<00:00,  1.25s/it]


Optimal T=1.2223831415176392
N: 20000 | Time: 0.330 (1.289) | Cvg@1: 0.719 (0.780) | Cvg@5: 0.969 (0.938) | Cvg@RAPS: 0.875 (0.898) | Size@RAPS: 3.594 (3.996)


  df_res = df_res.append(temp_res, ignore_index = True)


Size of the calibration dataset: 20000
Begin Platt scaling.
Computing logits for model (only happens once).


100%|██████████| 157/157 [03:18<00:00,  1.26s/it]


Optimal T=1.1953890323638916
N: 20000 | Time: 0.310 (1.285) | Cvg@1: 0.812 (0.784) | Cvg@5: 0.906 (0.939) | Cvg@RAPS: 0.875 (0.896) | Size@RAPS: 2.562 (3.731)


  df_res = df_res.append(temp_res, ignore_index = True)


Size of the calibration dataset: 500
Begin Platt scaling.
Computing logits for model (only happens once).


100%|██████████| 4/4 [00:05<00:00,  1.31s/it]


Optimal T=1.2946786880493164
N: 20000 | Time: 0.307 (1.283) | Cvg@1: 0.781 (0.782) | Cvg@5: 0.969 (0.939) | Cvg@RAPS: 0.875 (0.876) | Size@RAPS: 2.906 (3.857)


  df_res = df_res.append(temp_res, ignore_index = True)


Size of the calibration dataset: 2000
Begin Platt scaling.
Computing logits for model (only happens once).


100%|██████████| 16/16 [00:19<00:00,  1.24s/it]


Optimal T=1.218214750289917
N: 20000 | Time: 0.314 (1.278) | Cvg@1: 0.719 (0.778) | Cvg@5: 0.875 (0.940) | Cvg@RAPS: 0.844 (0.889) | Size@RAPS: 3.906 (3.704)


  df_res = df_res.append(temp_res, ignore_index = True)


Size of the calibration dataset: 5000
Begin Platt scaling.
Computing logits for model (only happens once).


100%|██████████| 40/40 [00:49<00:00,  1.23s/it]


Optimal T=1.2291799783706665
N: 20000 | Time: 0.349 (1.278) | Cvg@1: 0.781 (0.785) | Cvg@5: 0.906 (0.940) | Cvg@RAPS: 0.875 (0.899) | Size@RAPS: 6.781 (3.925)


  df_res = df_res.append(temp_res, ignore_index = True)


Size of the calibration dataset: 10000
Begin Platt scaling.
Computing logits for model (only happens once).


100%|██████████| 79/79 [01:38<00:00,  1.24s/it]


Optimal T=1.211180567741394
N: 20000 | Time: 0.666 (1.278) | Cvg@1: 0.812 (0.783) | Cvg@5: 0.938 (0.939) | Cvg@RAPS: 0.906 (0.894) | Size@RAPS: 4.062 (3.784)


  df_res = df_res.append(temp_res, ignore_index = True)


Size of the calibration dataset: 20000
Begin Platt scaling.
Computing logits for model (only happens once).


100%|██████████| 157/157 [03:18<00:00,  1.26s/it]


Optimal T=1.2085883617401123
N: 20000 | Time: 0.308 (1.277) | Cvg@1: 0.938 (0.785) | Cvg@5: 1.000 (0.940) | Cvg@RAPS: 0.938 (0.900) | Size@RAPS: 3.656 (3.805)


  df_res = df_res.append(temp_res, ignore_index = True)


Size of the calibration dataset: 500
Begin Platt scaling.
Computing logits for model (only happens once).


100%|██████████| 4/4 [00:05<00:00,  1.30s/it]


Optimal T=1.2989470958709717
N: 20000 | Time: 0.378 (1.281) | Cvg@1: 0.906 (0.781) | Cvg@5: 0.969 (0.939) | Cvg@RAPS: 1.000 (0.903) | Size@RAPS: 4.719 (4.412)


  df_res = df_res.append(temp_res, ignore_index = True)


Size of the calibration dataset: 2000
Begin Platt scaling.
Computing logits for model (only happens once).


100%|██████████| 16/16 [00:19<00:00,  1.20s/it]


Optimal T=1.2519090175628662
N: 20000 | Time: 0.310 (1.274) | Cvg@1: 0.875 (0.784) | Cvg@5: 1.000 (0.940) | Cvg@RAPS: 0.906 (0.892) | Size@RAPS: 1.719 (3.960)


  df_res = df_res.append(temp_res, ignore_index = True)


Size of the calibration dataset: 5000
Begin Platt scaling.
Computing logits for model (only happens once).


100%|██████████| 40/40 [00:50<00:00,  1.26s/it]


Optimal T=1.2278245687484741
N: 20000 | Time: 0.287 (1.292) | Cvg@1: 0.906 (0.780) | Cvg@5: 0.969 (0.941) | Cvg@RAPS: 0.938 (0.898) | Size@RAPS: 2.094 (3.951)


  df_res = df_res.append(temp_res, ignore_index = True)


Size of the calibration dataset: 10000
Begin Platt scaling.
Computing logits for model (only happens once).


100%|██████████| 79/79 [01:38<00:00,  1.25s/it]


Optimal T=1.2126215696334839
N: 20000 | Time: 0.303 (1.283) | Cvg@1: 0.906 (0.786) | Cvg@5: 0.938 (0.940) | Cvg@RAPS: 0.844 (0.900) | Size@RAPS: 4.219 (3.880)


  df_res = df_res.append(temp_res, ignore_index = True)


Size of the calibration dataset: 20000
Begin Platt scaling.
Computing logits for model (only happens once).


100%|██████████| 157/157 [03:16<00:00,  1.25s/it]


Optimal T=1.2093346118927002
N: 20000 | Time: 0.325 (1.277) | Cvg@1: 0.656 (0.781) | Cvg@5: 0.875 (0.940) | Cvg@RAPS: 0.875 (0.900) | Size@RAPS: 5.750 (3.883)


  df_res = df_res.append(temp_res, ignore_index = True)


Size of the calibration dataset: 500
Begin Platt scaling.
Computing logits for model (only happens once).


100%|██████████| 4/4 [00:04<00:00,  1.21s/it]


Optimal T=1.2986458539962769
N: 20000 | Time: 0.302 (1.285) | Cvg@1: 0.719 (0.784) | Cvg@5: 0.906 (0.939) | Cvg@RAPS: 0.969 (0.924) | Size@RAPS: 5.688 (4.958)


  df_res = df_res.append(temp_res, ignore_index = True)


Size of the calibration dataset: 2000
Begin Platt scaling.
Computing logits for model (only happens once).


100%|██████████| 16/16 [00:19<00:00,  1.25s/it]


Optimal T=1.2385576963424683
N: 20000 | Time: 0.311 (1.286) | Cvg@1: 0.781 (0.784) | Cvg@5: 0.969 (0.941) | Cvg@RAPS: 0.875 (0.899) | Size@RAPS: 4.094 (3.998)


  df_res = df_res.append(temp_res, ignore_index = True)


Size of the calibration dataset: 5000
Begin Platt scaling.
Computing logits for model (only happens once).


100%|██████████| 40/40 [00:49<00:00,  1.23s/it]


Optimal T=1.212593674659729
N: 20000 | Time: 0.317 (1.276) | Cvg@1: 0.781 (0.787) | Cvg@5: 0.938 (0.941) | Cvg@RAPS: 0.906 (0.903) | Size@RAPS: 4.156 (3.902)


  df_res = df_res.append(temp_res, ignore_index = True)


Size of the calibration dataset: 10000
Begin Platt scaling.
Computing logits for model (only happens once).


100%|██████████| 79/79 [01:37<00:00,  1.23s/it]


Optimal T=1.2358102798461914
N: 20000 | Time: 0.447 (1.283) | Cvg@1: 0.844 (0.786) | Cvg@5: 0.969 (0.941) | Cvg@RAPS: 1.000 (0.899) | Size@RAPS: 3.156 (3.979)


  df_res = df_res.append(temp_res, ignore_index = True)


Size of the calibration dataset: 20000
Begin Platt scaling.
Computing logits for model (only happens once).


100%|██████████| 157/157 [03:18<00:00,  1.26s/it]


Optimal T=1.2054104804992676
N: 20000 | Time: 0.305 (1.261) | Cvg@1: 0.812 (0.784) | Cvg@5: 0.906 (0.941) | Cvg@RAPS: 0.969 (0.901) | Size@RAPS: 5.219 (3.794)


  df_res = df_res.append(temp_res, ignore_index = True)


Size of the calibration dataset: 500
Begin Platt scaling.
Computing logits for model (only happens once).


100%|██████████| 4/4 [00:05<00:00,  1.25s/it]


Optimal T=1.2952290773391724
N: 20000 | Time: 0.305 (1.275) | Cvg@1: 0.844 (0.778) | Cvg@5: 0.906 (0.939) | Cvg@RAPS: 0.969 (0.886) | Size@RAPS: 4.938 (4.088)


  df_res = df_res.append(temp_res, ignore_index = True)


Size of the calibration dataset: 2000
Begin Platt scaling.
Computing logits for model (only happens once).


100%|██████████| 16/16 [00:19<00:00,  1.22s/it]


Optimal T=1.237162709236145
N: 20000 | Time: 0.323 (1.271) | Cvg@1: 0.844 (0.784) | Cvg@5: 0.875 (0.941) | Cvg@RAPS: 0.812 (0.891) | Size@RAPS: 3.469 (3.816)


  df_res = df_res.append(temp_res, ignore_index = True)


Size of the calibration dataset: 5000
Begin Platt scaling.
Computing logits for model (only happens once).


100%|██████████| 40/40 [00:48<00:00,  1.22s/it]


Optimal T=1.2482736110687256
N: 20000 | Time: 0.291 (1.266) | Cvg@1: 0.812 (0.783) | Cvg@5: 0.938 (0.941) | Cvg@RAPS: 0.875 (0.903) | Size@RAPS: 4.719 (4.068)


  df_res = df_res.append(temp_res, ignore_index = True)


Size of the calibration dataset: 10000
Begin Platt scaling.
Computing logits for model (only happens once).


100%|██████████| 79/79 [01:40<00:00,  1.28s/it]


Optimal T=1.1928552389144897
N: 20000 | Time: 0.323 (1.257) | Cvg@1: 0.844 (0.782) | Cvg@5: 1.000 (0.942) | Cvg@RAPS: 0.969 (0.899) | Size@RAPS: 2.375 (3.666)


  df_res = df_res.append(temp_res, ignore_index = True)


Size of the calibration dataset: 20000
Begin Platt scaling.
Computing logits for model (only happens once).


100%|██████████| 157/157 [03:17<00:00,  1.26s/it]


Optimal T=1.203143835067749
N: 20000 | Time: 0.295 (1.267) | Cvg@1: 0.875 (0.782) | Cvg@5: 0.969 (0.941) | Cvg@RAPS: 0.906 (0.903) | Size@RAPS: 2.844 (3.866)


  df_res = df_res.append(temp_res, ignore_index = True)


Size of the calibration dataset: 500
Begin Platt scaling.
Computing logits for model (only happens once).


100%|██████████| 4/4 [00:04<00:00,  1.21s/it]


Optimal T=1.2968050241470337
N: 20000 | Time: 0.439 (1.271) | Cvg@1: 0.750 (0.787) | Cvg@5: 0.938 (0.941) | Cvg@RAPS: 0.969 (0.912) | Size@RAPS: 5.000 (4.599)


  df_res = df_res.append(temp_res, ignore_index = True)


Size of the calibration dataset: 2000
Begin Platt scaling.
Computing logits for model (only happens once).


100%|██████████| 16/16 [00:20<00:00,  1.27s/it]


Optimal T=1.2502871751785278
N: 20000 | Time: 0.350 (1.283) | Cvg@1: 0.688 (0.780) | Cvg@5: 0.844 (0.939) | Cvg@RAPS: 0.812 (0.894) | Size@RAPS: 4.406 (3.982)


  df_res = df_res.append(temp_res, ignore_index = True)


Size of the calibration dataset: 5000
Begin Platt scaling.
Computing logits for model (only happens once).


100%|██████████| 40/40 [00:49<00:00,  1.24s/it]


Optimal T=1.2280153036117554
N: 20000 | Time: 0.304 (1.271) | Cvg@1: 0.719 (0.786) | Cvg@5: 0.969 (0.942) | Cvg@RAPS: 0.906 (0.897) | Size@RAPS: 4.938 (3.920)


  df_res = df_res.append(temp_res, ignore_index = True)


Size of the calibration dataset: 10000
Begin Platt scaling.
Computing logits for model (only happens once).


100%|██████████| 79/79 [01:39<00:00,  1.26s/it]


Optimal T=1.2208868265151978
N: 20000 | Time: 0.296 (1.276) | Cvg@1: 0.812 (0.785) | Cvg@5: 0.938 (0.942) | Cvg@RAPS: 0.844 (0.902) | Size@RAPS: 3.844 (3.952)


  df_res = df_res.append(temp_res, ignore_index = True)


Size of the calibration dataset: 20000
Begin Platt scaling.
Computing logits for model (only happens once).


100%|██████████| 157/157 [03:17<00:00,  1.26s/it]


Optimal T=1.193920612335205
N: 20000 | Time: 0.300 (1.271) | Cvg@1: 0.875 (0.781) | Cvg@5: 0.969 (0.939) | Cvg@RAPS: 0.875 (0.899) | Size@RAPS: 3.594 (3.760)


  df_res = df_res.append(temp_res, ignore_index = True)


In [11]:
print(df_res)

    calib_size  top1_coverage  top5_coverage  raps_coverage  raps_size
0        500.0        0.78335        0.94070        0.88630    4.10520
1       2000.0        0.78370        0.94340        0.89210    3.70490
2       5000.0        0.78200        0.94125        0.91280    4.33665
3      10000.0        0.78490        0.94005        0.90355    4.03925
4      20000.0        0.77915        0.93975        0.89835    3.85250
5        500.0        0.78490        0.94210        0.92890    5.02480
6       2000.0        0.77975        0.94005        0.89330    4.05325
7       5000.0        0.77900        0.94005        0.89705    3.94760
8      10000.0        0.78290        0.93880        0.89155    3.66110
9      20000.0        0.78505        0.94140        0.90085    3.86545
10       500.0        0.77835        0.93980        0.88155    3.96495
11      2000.0        0.78380        0.93920        0.90995    4.50045
12      5000.0        0.78625        0.94200        0.90200    3.91860
13    

In [12]:
mean_res = df_res.groupby('calib_size')[['top1_coverage', 'top5_coverage','raps_coverage', 'raps_size']].mean()
print(mean_res)

            top1_coverage  top5_coverage  raps_coverage  raps_size
calib_size                                                        
500.0            0.782000       0.940270       0.899500   4.368020
2000.0           0.782285       0.940550       0.896815   4.016885
5000.0           0.783390       0.940905       0.900435   3.960175
10000.0          0.783155       0.939965       0.898710   3.898550
20000.0          0.782125       0.939780       0.899435   3.829120
