In [1]:
import os
import shutil
import numpy as np
import pandas as pd
import torchvision
import torchvision.transforms as transforms
import torch
import torch.nn.functional as F
import torchvision.transforms.functional as TF
import random
import json
from tqdm import tqdm
from torch.utils.data import random_split, DataLoader
from matplotlib import pyplot as plt
from google.colab import drive
drive.mount('/content/drive')
simfolder = '/content/drive/My Drive/Colab Notebooks/simfolder/'
stimuli_folder = '/content/drive/My Drive/Colab Notebooks/stimuli/'


imagenet_dir = 'imagenet_val'
if not os.path.exists(imagenet_dir):
  #!cp -r "{simfolder}"* /content/
  shutil.copy(simfolder + 'imagenet_val.tar', './imagenet_val.tar')
  print("Uncompressing...")
  !mkdir imagenet_val
  !tar -xf imagenet_val.tar -C ./imagenet_val/
  !rm imagenet_val.tar
  os.chdir('./imagenet_val')
  !wget -qO- https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh | bash
  os.chdir('/content/')
  print("Files imported")

files_utils = ['exp_functions.py', 'similarity_utils.py', 'imagenet-simple-labels.json', 'class_proxies.pt', 'cmodel02.pt', 'cmodel005.pt', 'stimuli_list.json']
for fpath in files_utils:
  if os.path.exists(simfolder + fpath) and not os.path.exists(fpath):
    shutil.copy(simfolder + fpath, f'./{fpath}')

from exp_functions import *
from similarity_utils import *

Mounted at /content/drive
Uncompressing...
Files imported


In [2]:
# get label name mapping 'labeldict'
labels_json = load_json_file('imagenet-simple-labels.json')
labeldict = {}
for i in range(len(labels_json)):
  labeldict[i] = labels_json[i]

In [3]:
model = torchvision.models.wide_resnet101_2(weights='Wide_ResNet101_2_Weights.IMAGENET1K_V2', progress=True).cuda()
_ = model.eval()

penultimate_layer = torch.nn.Sequential(*list(model.children())[:-1])
_ = penultimate_layer.eval()

# Normalization from torchvision repo
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std= [0.229, 0.224, 0.225])
    ])

image_folder = torchvision.datasets.ImageFolder('./imagenet_val/', transform)

# Create a lookup table that stores each label's representative image (proxy)
if not os.path.exists('class_proxies.pt'):
  class_proxies = get_class_proxies(penultimate_layer, image_folder, num_classes = 1000, images_per_class =50)
  torch.save(class_proxies, 'class_proxies.pt')

proxy_table = torch.load('class_proxies.pt')

Downloading: "https://download.pytorch.org/models/wide_resnet101_2-d733dc28.pth" to /root/.cache/torch/hub/checkpoints/wide_resnet101_2-d733dc28.pth
100%|██████████| 485M/485M [00:02<00:00, 197MB/s]


In [4]:
seed=0
np.random.seed(seed=seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
random.seed(seed)

batch_size = 128
num_calib = 25000

imagenet_calib_data, imagenet_val_data = random_split(image_folder, [num_calib, 50000-num_calib])
calib_loader = DataLoader(imagenet_calib_data, batch_size = batch_size, shuffle = True, pin_memory = True)
val_loader = DataLoader(imagenet_val_data, batch_size = batch_size, shuffle = False, pin_memory = True)

if not os.path.exists('cmodel02.pt'):
  cmodel02 = ConformalModel(model, calib_loader, alpha=0.2, randomized=True, allow_zero_sets=True, lamda_criterion='adaptiveness')
  torch.save(cmodel02, 'cmodel02.pt')
else:
  cmodel02 = torch.load('cmodel02.pt', weights_only=False)

#_, _, _, _ = validate(val_loader, cmodel02, print_bool=True)
_ = cmodel02.eval()

In [5]:
if not os.path.exists('cmodel005.pt'):
  cmodel005 = ConformalModel(model, calib_loader, alpha=0.05, randomized=True, allow_zero_sets=True, lamda_criterion='adaptiveness')
  torch.save(cmodel005, 'cmodel005.pt')
else:
  cmodel005 = torch.load('cmodel005.pt', weights_only=False)

#_, _, _, _ = validate(val_loader, cmodel005, print_bool=True)
_ = cmodel005.eval()

In [6]:
# First-pass selection: instances with cmodel02-generated sets of size in range [2, 10].
if not os.path.exists('stimuli_list.json'):
  val_indices = imagenet_val_data.indices
  stimuli_list = select_stimuli(cmodel02, val_loader, val_indices, min_size = 2, max_size = 10)
  with open('stimuli_list.json', 'w') as f:
    json.dump(stimuli_list, f, indent=2)

In [7]:
# randomly pick 100 stimuli
random.seed(0)
stimuli_list_json = load_json_file('stimuli_list.json')
subset100 = random.sample(stimuli_list_json, 100)
with open('selected100_cmodel02.json', 'w') as f:
  json.dump(subset100, f, indent=2)

stimuli_cmodel005 = []

stimuli_info = []

with torch.no_grad():
  for instance in subset100:
    file_index = instance['file_index']
    set_size_A = instance['set_size']
    predset_A = instance['prediction_set']

    # create the stimulus folder for conformal predictor cmodel02 (80% acc)
    image_save_dir_A = f"./Stimulus{file_index}_A"
    os.makedirs(image_save_dir_A, exist_ok=True)

    avgsim_A, medsim_A, minsim_A = compute_set_similarity(predset_A, proxy_table)
    curate_proxy_images(file_index, predset_A, proxy_table, labeldict, image_folder, image_save_dir_A)
    zip_name_A = f"{file_index}_({set_size_A})_80acc_avg{avgsim_A:.4f}_med{medsim_A:.4f}_min{minsim_A:.6f}"
    shutil.make_archive(zip_name_A, 'zip', image_save_dir_A)
    shutil.copy(f"{zip_name_A}.zip", stimuli_folder)

    # create the stimulus folder for conformal predictor cmodel005 (95% acc)
    image_save_dir_B = f"./Stimulus{file_index}_B"
    os.makedirs(image_save_dir_B, exist_ok=True)

    # get its prediction set first
    img, class_id = image_folder[file_index]
    class_name = labeldict[class_id]
    _, predset_B = cmodel005(img.view(1,3,224,224).cuda())
    predset_B = predset_B[0].tolist()
    set_size_B = len(predset_B)
    stimuli_cmodel005.append({'file_index': file_index, 'set_size': set_size_B, 'prediction_set': predset_B})

    avgsim_B, medsim_B, minsim_B = compute_set_similarity(predset_B, proxy_table)
    curate_proxy_images(file_index, predset_B, proxy_table, labeldict, image_folder, image_save_dir_B)
    zip_name_B = f"{file_index}_({set_size_B})_95acc_avg{avgsim_B:.4f}_med{medsim_B:.4f}_min{minsim_B:.6f}"
    shutil.make_archive(zip_name_B, 'zip', image_save_dir_B)
    shutil.copy(f"{zip_name_B}.zip", stimuli_folder)

    stimuli_info.append({
        'file_index': file_index,
        'label_id"': class_id,
        'label_name': class_name,
        'setsize_80': set_size_A,
        'setsize_95': set_size_B,
        'avgsim_80': avgsim_A,
        'medsim_80': medsim_A,
        'minsim_80': minsim_A,
        'avgsim_95': avgsim_B,
        'medsim_95': medsim_B,
        'minsim_95': minsim_B
        })

with open('selected100_cmodel005.json', 'w') as f:
  json.dump(stimuli_cmodel005, f, indent=2)

df = pd.DataFrame(stimuli_info)
df.to_csv('stimuli_info.csv', index=False)

for fpath in ['class_proxies.pt', 'cmodel02.pt', 'cmodel005.pt', 'stimuli_list.json']:
  if not os.path.exists(simfolder + fpath):
    shutil.copy(fpath, simfolder)

for fpath in ['stimuli_info.csv', 'selected100_cmodel02.json', 'selected100_cmodel005.json']:
  shutil.copy(fpath, stimuli_folder)