In [None]:
# %load_ext autoreload
# %autoreload 2

import os
import sys
import random
sys.path.insert(0, "../")
sys.path.insert(0, "../../")

from autogluon.vision import ImagePredictor, ImageDataset
import numpy as np
import pandas as pd

import seaborn as sns
from sklearn.metrics import roc_auc_score
from cleanlab.count import get_confident_thresholds
from cleanlab.internal.label_quality_utils import get_normalized_entropy

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)

random.seed(10)

## Read data

In [None]:
%%time

model = "swin_base_patch4_window7_224" # uses Torch backend
# model = "resnet50_v1" # uses MXNET backend

data_model_dict = {
    "cifar-10": {"data_path": "/datasets/uly/ood-data/cifar10_png/"},
    "cifar-100": {"data_path": "/datasets/uly/ood-data/cifar100_png/"},
    "mnist": {"data_path": "/datasets/uly/ood-data/mnist_png/"},
    "fashion-mnist": {"data_path": "/datasets/uly/ood-data/fashion_mnist_png/"}
}

# Get data, model, and pre-trained features
for dataset in data_model_dict.keys():
    
    print("--------------------------")
    print(f"Getting data for {dataset}")
    
    # Get path to data
    data_path = data_model_dict[dataset]["data_path"]
    
    # Get train and test data
    data_model_dict[dataset]["train_data"], _, data_model_dict[dataset]["test_data"] = \
        ImageDataset.from_folders(root=data_path)
    
    # Get path to saved model
    data_model_dict[dataset]["model"] = f"./models/{model}_{dataset}.ag"

## Evaluate models on test data as a sanity check

In [None]:
%%time

accuracy_result_list = []

for key, data in data_model_dict.items():
    dataset = key

    model_path = data["model"]
    test_dataset = data["test_data"]
    
    print("----------------------------------")
    print(f"Dataset: {dataset}")
    
    # load model
    print("  Loading model...")
    predictor_loaded = ImagePredictor.load(model_path)
    
    # evaluating model on test data
    print("  Evaluating model...")
    eval_ = predictor_loaded.evaluate(test_dataset)
    print(f"    Evaluation: {eval_}")
    
    accuracy_result = {
        "dataset": dataset,
        "top1": eval_["top1"]
    }
    
    accuracy_result_list.append(accuracy_result)

## Evaluate OOD Scores on TEST data

## Save the pred_probs used for OOD scoring

In [None]:
%%time

# pairs of (in-distribution, out-of-distribution) datasets to evaluate
in_out_pairs = [
    {"in": "cifar-10", "out": "cifar-100"},
    {"in": "cifar-100", "out": "cifar-10"},
    {"in": "mnist", "out": "fashion-mnist"},
    {"in": "fashion-mnist", "out": "mnist"},
]

for in_out_pair in in_out_pairs:
    
    in_dataset, out_dataset = in_out_pair["in"], in_out_pair["out"]
    
    # path to model trained on in-distribution train dataset
    in_model_path = data_model_dict[in_dataset]["model"]

    # get in-distribution TRAIN dataset
    in_train_dataset = data_model_dict[in_dataset]["train_data"]
    in_train_dataset_class_labels = in_train_dataset.label.values # class labels for the in-distribution train dataset
    
    # get TEST datasets used for evaluation
    in_test_dataset = data_model_dict[in_dataset]["test_data"]
    in_test_dataset_class_labels = in_test_dataset.label.values # class labels for the in-distribution test dataset

    out_test_dataset = data_model_dict[out_dataset]["test_data"]
    
    print("-----------------------------------------------------")
    print("(in-distribution, out-of-distribution) dataset pair: ", in_dataset, out_dataset)
    
    # load model (trained on training set)
    print("  Loading model...")
    in_predictor_loaded = ImagePredictor.load(in_model_path)
    
    # Get predicted probabilities
    print("  Generating predicted probabilities...")
    in_train_pred_probs = in_predictor_loaded.predict_proba(data=in_train_dataset, as_pandas=False)    
    in_test_pred_probs = in_predictor_loaded.predict_proba(data=in_test_dataset, as_pandas=False)
    out_test_pred_probs = in_predictor_loaded.predict_proba(data=out_test_dataset, as_pandas=False)
    
    # Save files here
    out_folder = f"./model_{model}_experiment_in_{in_dataset}_out_{out_dataset}/"
    
    # Create folder if it doesn't exist
    os.makedirs(out_folder, exist_ok=True)
    
    #### Uncomment below to save files

    np.save(out_folder + "in_train_pred_probs.npy", in_train_pred_probs)
    np.save(out_folder + "in_test_pred_probs.npy", in_test_pred_probs)
    np.save(out_folder + "out_test_pred_probs.npy", out_test_pred_probs)
    
    np.save(out_folder + "in_train_dataset_class_labels.npy", in_train_dataset_class_labels)
    np.save(out_folder + "in_test_dataset_class_labels.npy", in_test_dataset_class_labels)

## Run OOD scoring on loaded pred_probs

In [None]:
def cross_entropy(p, q):
    return -np.sum(p * np.log(q)) / q.shape[0]

In [None]:
%%time

# pairs of (in-distribution, out-of-distribution) datasets to evaluate
in_out_pairs = [
    {"in": "cifar-10", "out": "cifar-100"},
    {"in": "cifar-100", "out": "cifar-10"},
    {"in": "mnist", "out": "fashion-mnist"},
    {"in": "fashion-mnist", "out": "mnist"},
]

k_max = 110

results_list = []

for in_out_pair in in_out_pairs:
    
    in_dataset, out_dataset = in_out_pair["in"], in_out_pair["out"]
    
    print("-----------------------------------------------------")
    print("(in-distribution, out-of-distribution) dataset pair: ", in_dataset, out_dataset)
    
    # Save files here
    out_folder = f"./model_{model}_experiment_in_{in_dataset}_out_{out_dataset}/"
    
    # Load files
    in_train_pred_probs = np.load(out_folder + "in_train_pred_probs.npy")
    in_test_pred_probs = np.load(out_folder + "in_test_pred_probs.npy")
    out_test_pred_probs = np.load(out_folder + "out_test_pred_probs.npy")
    
    in_train_dataset_class_labels = np.load(out_folder + "in_train_dataset_class_labels.npy")
    in_test_dataset_class_labels = np.load(out_folder + "in_test_dataset_class_labels.npy")
    
    # Create OOD binary labels (1 = out-of-distribution)
    in_labels = np.zeros(shape=len(in_test_pred_probs))
    out_labels = np.ones(shape=len(out_test_pred_probs))
    ood_mask = np.hstack([in_labels, out_labels]).astype(int) # OOD binary indicator

    #### Compute nearest neighbors
    
    #### Get scores
    
    # Adjusted MSP & Entropy in confidence thresholds
    class_confident_thresholds = get_confident_thresholds(in_train_dataset_class_labels, in_train_pred_probs, multi_label=False)
    
    # Train Entropy
    in_train_entropy = get_normalized_entropy(in_train_pred_probs)
    
    #### Get scores for test dataset
    
    # 1 - Max Pred Probs
    test_one_minus_max_pred_prob = 1. - test_pred_probs.max(axis=1)

    # Entropy
    test_entropy = get_normalized_entropy(test_pred_probs)

    # Adjust pred-probs for Adjusted MSP and Entropy
    test_pred_probs_adj = test_pred_probs - class_confident_thresholds
    test_pred_probs_adj += class_confident_thresholds.max()
    test_pred_probs_adj /= test_pred_probs_adj.sum(axis=1)[:, None]
    
    # Adjusted MSP
    test_adj_msp = 1. - test_pred_probs_adj.max(axis=1)
    
    # Adjusted Entropy
    test_adj_entropy =  get_normalized_entropy(test_pred_probs_adj)
    
    #### Evaluate scores
    
    auroc_test_one_minus_max_pred_prob = roc_auc_score(ood_mask, test_one_minus_max_pred_prob)
    auroc_test_entropy = roc_auc_score(ood_mask, test_entropy)
    auroc_test_adj_one_minus_max_pred_prob = roc_auc_score(ood_mask, test_adj_msp)
    auroc_test_adj_entropy = roc_auc_score(ood_mask, test_adj_entropy)
    
    results = {
        "in_distribution": in_dataset,
        "out_of_distribution": out_dataset,

        "auroc_test_one_minus_max_pred_prob": auroc_test_one_minus_max_pred_prob,
        "auroc_test_entropy": auroc_test_entropy,
        
        "auroc_test_adj_mst": auroc_test_adj_one_minus_max_pred_prob,
        "auroc_test_adj_entropy": auroc_test_adj_entropy,
    }
    
    results_list.append(results)
    
    
    

## Put results to a DataFrame

In [None]:
df_results = pd.DataFrame(results_list)

In [None]:
cols = [
    'in_distribution',
    'out_of_distribution',
    'auroc_test_one_minus_max_pred_prob',
    'auroc_test_entropy',
    'auroc_test_adj_mst',
    'auroc_test_adj_entropy',
]

cols_rename_dict = {
    'in_distribution': 'In Distribution',
    'out_of_distribution': 'Out of Distribution',
    'auroc_test_one_minus_max_pred_prob': 'MSP',
    'auroc_test_entropy': 'Entropy',
    'auroc_test_adj_mst' : 'Adjusted MSP',
    'auroc_test_adj_entropy': 'Adjusted Entropy',
}

In [None]:
# rename columns before exporting to latex
df_results[cols].rename(columns=cols_rename_dict)

In [None]:
# write to Latex file
with open(f"{model}_ood_auroc.tex", "w") as tf:
    tf.write(df_results[cols].rename(columns=cols_rename_dict).to_latex(index=False, float_format="%.4f"))