In [1]:
import os
import torch
import numpy as np
import pandas as pd
from collections import defaultdict
from tqdm.notebook import tqdm

torch.manual_seed(42)

#### Load files

In [2]:
path_to_imagenet = '/scratch/users/vision/saarim/imagenet-testbed/outputs'

model_names = []

with open('model_names.txt', 'r') as filehandle:
    for line in filehandle:
        currentPlace = line[:-1]
        model_names.append(currentPlace)
        
logits = defaultdict(dict)
targets = {}
output_folders = os.listdir(path_to_imagenet)

for model in tqdm(model_names):
    for eval_setting in ['val', 'imagenetv2-matched-frequency']:
        output_folder = model + '-' + eval_setting
        if output_folder in output_folders:
            model_targets = os.path.join(path_to_imagenet, output_folder, 'targets.pt')
            model_logits = os.path.join(path_to_imagenet, output_folder, 'logits.pt')
            if os.path.exists(model_logits):
                logits[eval_setting][model] = torch.load(model_logits)
            if eval_setting not in targets and os.path.exists(model_targets):
                targets[eval_setting] = torch.load(model_targets)

HBox(children=(FloatProgress(value=0.0, max=206.0), HTML(value='')))




#### Helper Functions

In [3]:
def accuracy_topk(logits, targets, topk=(1, 5)):
    maxk = max(topk)
    batch_size = targets.size(0)

    _, pred = logits.topk(maxk, 1, True, True)
    pred = pred.t()
    correct = pred.eq(targets.view(1, -1).expand_as(pred))

    topk=(1, 5)
    res = {}
    for k in topk:
        correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
        res[f'top{k}'] = correct_k.mul_(100.0 / batch_size).item()
    return res

def find_correct(logits, targets, topk=1):
    """Returns a boolean tensor showing correct predictions"""
    batch_size = targets.size(0)

    _, pred = logits.topk(1, 1, True, True)
    pred = pred.t()
    return pred.eq(targets.view(1, -1).expand_as(pred))

def num_pairwise_errors(x_correct, y_correct):
    """Finds the number of shared elements incorrectly classified for x and y"""
    
    assert x_correct.size() == y_correct.size(), 'x and y are not the same size'
    x_error_idx = (x_correct == False).nonzero(as_tuple=True)[1]
    y_error_idx = (y_correct == False).nonzero(as_tuple=True)[1]
    return len(np.intersect1d(x_error_idx, y_error_idx))

def pairwise_corrcoef(x_logits, y_logits):
    """Applies softmax to each row of 50000 entries, flattens, then calculates correlation
    Note: Logits are originally of shape torch.Size([50000, 1000])
    """
    sigmoid_x = torch.nn.functional.softmax(x_logits, dim=1).flatten().numpy()
    sigmoid_y = torch.nn.functional.softmax(y_logits, dim=1).flatten().numpy()
    return np.corrcoef(sigmoid_x, sigmoid_y)[0][1]

def get_targets(eval_setting, targets):
    assert eval_setting in targets, 'eval setting not in targets dict'
    return targets[eval_setting]

def partition(data):
    """Deterministically shuffles and partitions data into 80% test data and 20% validation data"""
    train_size = int(0.8 * len(data))
    val_size = len(data) - train_size
    train_data, val_data = torch.utils.data.random_split(data, [train_size, val_size])    
    return train_data.dataset, val_data.dataset
    


#### Pairwise Error Overlap

In [None]:
def create_pairwise_error_df(eval_setting):
    pairwise_errors = defaultdict(dict)
    eval_targets = get_targets(eval_setting, targets)

    for x_model, x_logits in tqdm(logits[eval_setting].items()):
        x_correct = find_correct(x_logits, eval_targets)
        x_correct_train, x_correct_test = partition(x_correct)
        
        for y_model, y_logits in logits[eval_setting].items():
            y_correct = find_correct(y_logits, eval_targets)
            y_correct_train, y_correct_test = partition(y_correct)
            
            # utilize symmetric property of pairwise matrix to reduce computation
            if x_model != y_model and y_model in pairwise_errors and x_model in pairwise_errors[y_model]:
                pairwise_errors[x_model][y_model] = pairwise_errors[y_model][x_model]
            else:
                pairwise_errors[x_model][y_model] = num_pairwise_errors(x_correct_train, y_correct_train)

    df = pd.DataFrame(pairwise_errors)
    styles = [dict(selector='caption', props=[('caption-side', 'top'), ("font-size", "150%")])]
    df = df.style.set_table_styles(styles).set_caption(eval_setting)
    return df

df_val_pairwise_error = create_pairwise_error_df('val')
df_imagenetv2_pairwise_error = create_pairwise_error_df('imagenetv2-matched-frequency')

display(df_val_pairwise_error)
display(df_imagenetv2_pairwise_error)

HBox(children=(FloatProgress(value=0.0, max=186.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=203.0), HTML(value='')))

#### Pairwise Correlation Between Concatenated Predicted Probability Vectors

In [None]:
def create_pairwise_corr_df(eval_setting):
    pairwise_corr = defaultdict(dict)
    eval_targets = get_targets(eval_setting, targets)

    for x_model, x_logits in tqdm(logits[eval_setting].items()):
        x_train, x_test = partition(x_logits)
        
        for y_model, y_logits in logits[eval_setting].items():
            y_train, y_test = partition(y_logits)
            
            # utilize symmetric property of pairwise matrix to reduce computation
            if x_model != y_model and y_model in pairwise_corr and x_model in pairwise_corr[y_model]:
                pairwise_corr[x_model][y_model] = pairwise_corr[y_model][x_model]
            else:
                pairwise_corr[x_model][y_model] = pairwise_corrcoef(x_train, y_train)

    df = pd.DataFrame(pairwise_corr)
    styles = [dict(selector='caption', props=[('caption-side', 'top'), ("font-size", "150%")])]
    df = df.style.set_table_styles(styles).set_caption(eval_setting)
    return df

df_val_pairwise_corr = create_pairwise_corr_df('val')
df_imagenetv2_pairwise_corr = create_pairwise_corr_df('imagenetv2-matched-frequency')

display(df_val_pairwise_corr)
display(df_imagenetv2_pairwise_corr)