In [6]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import time
from tqdm import tqdm
from scipy.special import softmax 
import torch.backends.cudnn as cudnn
import random
import pickle
import itertools

In [7]:
import os
import shutil
from google.colab import drive
drive.mount('/content/drive')
checkpoints = '/content/drive/My Drive/Colab Notebooks/cse559a/'
if not os.path.exists(checkpoints):
  os.makedirs(checkpoints)
  
if not os.path.exists('imagenet_val'):
  if not os.path.exists(checkpoints + 'imagenet_val.tar'):
    print("Downloading archive...")
    os.chdir(checkpoints)
    !wget -nv -O imagenet_val.tar -L https://image-net.org/data/ILSVRC/2012/ILSVRC2012_img_val.tar
    os.chdir('/content/')
  print("Copying to local runtime...")
  shutil.copy(checkpoints + 'imagenet_val.tar', './imagenet_val.tar')
  print("Uncompressing...")
  !mkdir imagenet_val
  !tar -xf imagenet_val.tar -C ./imagenet_val/
  !rm imagenet_val.tar
  os.chdir('./imagenet_val') 
  !wget -qO- https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh | bash
  os.chdir('/content/')
print("Data ready!")

if not os.path.exists('exp_v0.py'):
  shutil.copy(checkpoints + 'exp_v0.py', './exp_v0.py')
  print("File imported")
from exp_v0 import *

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Data ready!


In [8]:
def _fix_randomness(seed=0):
    ### Fix randomness 
    np.random.seed(seed=seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    random.seed(seed)

# Returns a dataframe with:
# 1) Set sizes for all test-time examples.
# 2) topk for each example, where topk means which score was correct.
def sizes_topk(modelname, datasetname, datasetpath, alpha, kreg, lamda, randomized, n_data_conf, n_data_val, bsz, predictor):
    _fix_randomness()
    ### Experiment logic
    naive_bool = predictor == 'Naive'
    lamda_predictor = lamda
    if predictor in ['Naive', 'APS']:
        lamda_predictor = 0 # No regularization.

    ### Data Loading
    logits = get_logits_dataset(modelname, datasetname, datasetpath)
    logits_cal, logits_val = split2(logits, n_data_conf, n_data_val) # A new random split for every trial
    # Prepare the loaders
    loader_cal = torch.utils.data.DataLoader(logits_cal, batch_size = bsz, shuffle=False, pin_memory=True)
    loader_val = torch.utils.data.DataLoader(logits_val, batch_size = bsz, shuffle=False, pin_memory=True)

    ### Instantiate and wrap model
    model = create_model(modelname)
    # Conformalize the model
    conformal_model = ConformalModelLogits(model, loader_cal, alpha=alpha, kreg=kreg, lamda=lamda_predictor, randomized=randomized, naive=naive_bool)

    df = pd.DataFrame(columns=['model','predictor','size','topk','lamda'])
    corrects = 0
    denom = 0
    ### Perform experiment
    for i, (logit, target) in tqdm(enumerate(loader_val)):
        # compute output
        output, S = conformal_model(logit) # This is a 'dummy model' which takes logits, for efficiency.
        # measure accuracy and record loss
        size = np.array([x.size for x in S])
        I, _, _ = sort_sum(logit.numpy()) 
        topk = np.where((I - target.view(-1,1).numpy())==0)[1]+1 
        batch_df = pd.DataFrame({'model': modelname, 'predictor': predictor, 'size': size, 'topk': topk, 'lamda': lamda})
        df = pd.concat([df, batch_df], ignore_index=True)

        corrects += sum(topk <= size)
        denom += output.shape[0] 

    print(f"Empirical coverage: {corrects/denom} with lambda: {lamda}")
    return df

In [9]:
# Plotting code
def adaptiveness_table(df_big):

    sizes = [[0,1],[2,3],[4,6],[7,10],[11,100],[101,1000]]

    tbl = ""
    tbl += "\\begin{table}[t]\n"
    tbl += "\\centering\n"
    tbl += "\\small\n"
    tbl += "\\begin{tabular}{l"

    lamdaunique = df_big.lamda.unique()

    multicol_line = "        " 
    midrule_line = "        "
    label_line = "size "

    for i in range(len(lamdaunique)):
        j = 2*i 
        tbl += "cc"
        multicol_line += (" & \multicolumn{2}{c}{$\lambda={" + str(lamdaunique[i]) + "}$}    ")
        midrule_line += (" \cmidrule(r){" + str(j+1+1) + "-" + str(j+2+1) + "}    ")
        label_line += "&cnt & cvg     "

    tbl += "} \n"
    tbl += "\\toprule\n"
    multicol_line += "\\\\ \n"
    midrule_line += "\n"
    label_line += "\\\\ \n"
    
    tbl = tbl + multicol_line + midrule_line + label_line
    tbl += "\\midrule \n"

    #DEBUG
    total_coverages = {lamda:0 for lamda in lamdaunique}
    for sz in sizes:
        if sz[0] == sz[1]:
            tbl += str(sz[0]) + "     "
        else:
            tbl += str(sz[0]) + " to " + str(sz[1]) + "     "
        df = df_big[(df_big['size'] >= sz[0]) & (df_big['size'] <= sz[1])]

        for lamda in lamdaunique:
            df_small = df[df.lamda == lamda]
            if(len(df_small)==0):
                tbl += f" & 0 & "
                continue
            cvg = len(df_small[df_small.topk <= df_small['size']])/len(df_small)
            #diff = df_small['topk'].mean()
            total_coverages[lamda] += cvg * len(df_small)/len(df_big)*len(lamdaunique)
            tbl +=  f" & {len(df_small)} & {cvg:.2f} "
            #tbl +=  f" & {len(df_small)} & {cvg:.2f} & {diff:.1f}  "

        tbl += "\\\\ \n"
    tbl += "\\bottomrule\n"
    tbl += "\\end{tabular}\n"
    tbl += "\\caption{\\textbf{Coverage conditional on set size.} We report average coverage of images stratified by the size of the set output by a conformalized ResNet-152 for $k_{reg}=5$ and varying $\lambda$.}\n"
    tbl += "\\label{table:adaptiveness}\n"
    tbl += "\\end{table}\n"

    print(total_coverages)

    return tbl


In [10]:
modelnames = ['ResNet152']
alphas = [0.1]
predictors = ['RAPS']
lamdas = [0, 0.001, 0.01, 0.1, 1] 
params = list(itertools.product(modelnames, alphas, predictors, lamdas))
m = len(params)
datasetname = 'ImagenetVal'
datasetpath = './imagenet_val/'
kreg = 5 
randomized = True
n_data_conf = 20000
n_data_val = 20000
bsz = 64
cudnn.benchmark = True

### Perform the experiment
df = pd.DataFrame(columns = ["model","predictor","size","topk","lamda"])
for i in range(m):
    modelname, alpha, predictor, lamda = params[i]
    print(f'Model: {modelname} | Desired coverage: {1-alpha} | Predictor: {predictor} | Lambda = {lamda}')
    out = sizes_topk(modelname, datasetname, datasetpath, alpha, kreg, lamda, randomized, n_data_conf, n_data_val, bsz, predictor)
    df = pd.concat([df, out], ignore_index=True) 


tbl = adaptiveness_table(df)
print(tbl)

Model: ResNet152 | Desired coverage: 0.9 | Predictor: RAPS | Lambda = 0


Downloading: "https://download.pytorch.org/models/resnet152-394f9c45.pth" to /root/.cache/torch/hub/checkpoints/resnet152-394f9c45.pth
100%|██████████| 230M/230M [00:00<00:00, 317MB/s]


Computing logits for model (only happens once).


100%|██████████| 1563/1563 [08:27<00:00,  3.08it/s]
313it [00:05, 56.80it/s]


Empirical coverage: 0.9018 with lambda: 0
Model: ResNet152 | Desired coverage: 0.9 | Predictor: RAPS | Lambda = 0.001


313it [00:05, 57.02it/s]


Empirical coverage: 0.8984 with lambda: 0.001
Model: ResNet152 | Desired coverage: 0.9 | Predictor: RAPS | Lambda = 0.01


313it [00:05, 57.63it/s]


Empirical coverage: 0.8978 with lambda: 0.01
Model: ResNet152 | Desired coverage: 0.9 | Predictor: RAPS | Lambda = 0.1


313it [00:05, 57.04it/s]


Empirical coverage: 0.8991 with lambda: 0.1
Model: ResNet152 | Desired coverage: 0.9 | Predictor: RAPS | Lambda = 1


313it [00:05, 57.21it/s]


Empirical coverage: 0.8995 with lambda: 1
{0: 0.9018, 0.001: 0.8983999999999999, 0.01: 0.8977999999999999, 0.1: 0.8991, 1: 0.8995000000000001}
\begin{table}[t]
\centering
\small
\begin{tabular}{lcccccccccc} 
\toprule
         & \multicolumn{2}{c}{$\lambda={0}$}     & \multicolumn{2}{c}{$\lambda={0.001}$}     & \multicolumn{2}{c}{$\lambda={0.01}$}     & \multicolumn{2}{c}{$\lambda={0.1}$}     & \multicolumn{2}{c}{$\lambda={1}$}    \\ 
         \cmidrule(r){2-3}     \cmidrule(r){4-5}     \cmidrule(r){6-7}     \cmidrule(r){8-9}     \cmidrule(r){10-11}    
size &cnt & cvg     &cnt & cvg     &cnt & cvg     &cnt & cvg     &cnt & cvg     \\ 
\midrule 
0 to 1      & 11638 & 0.88  & 11543 & 0.88  & 11230 & 0.89  & 10476 & 0.92  & 10024 & 0.93 \\ 
2 to 3      & 3725 & 0.92  & 3698 & 0.91  & 3740 & 0.92  & 3847 & 0.93  & 3925 & 0.94 \\ 
4 to 6      & 1215 & 0.90  & 1289 & 0.91  & 1705 & 0.92  & 4220 & 0.89  & 6051 & 0.83 \\ 
7 to 10      & 694 & 0.94  & 768 & 0.92  & 1311 & 0.91  & 1435 & 0.71  &