# 🧐 Detectors' leaderboard

In [2]:
import os
import pandas as pd
import detectors
import warnings
import json
warnings.filterwarnings('ignore')

NOTE: Redirects are currently not supported in Windows or MacOs.


In [3]:
RESULTS_FOLDER = detectors.config.RESULTS_DIR
RESULTS_FOLDER

'/Users/dadalto/github/detectors/results/'

In [26]:
def create_leaderboard(results_folder: str, pipeline_name: str):
    path = os.path.join(results_folder, pipeline_name)
    df = pd.read_csv(os.path.join(path, 'results.csv'))
    df = df.drop_duplicates()
    # drop nan
    df = df.dropna()
    # objective auroc
    df = df.query('objective == "auroc" and objective_dataset == "average"')
    # datasets
    datasets = ["average"]
    for ds in datasets:
        # print(json.loads(df[ds].iloc[0].replace("'", '"'))["auroc"])
        df[ds] = df[ds].apply(lambda x: json.loads(x.replace("'", '"'))["auroc"])
    columns = ["model", "method", "seed"] + datasets
    df = df[columns]
    df = df.groupby(["model", "method"]).mean().reset_index().sort_values(["model","average"], ascending=False)
    return df.to_markdown(index=False)

## CIFAR-10

In [27]:
PIPELINE_NAME = "ood_validation_cifar10"
def create_leaderboard(results_folder: str, pipeline_name: str):
    path = os.path.join(results_folder, pipeline_name)
    df = pd.read_csv(os.path.join(path, 'results.csv'))
    df = df.drop_duplicates()
    # drop nan
    df = df.dropna()
    # objective auroc
    # filter sed 42
    df = df.query('seed != 42')
    df = df.query('objective == "auroc" and objective_dataset == "average"')
    # datasets
    datasets = ["cifar100", "average"]
    for ds in datasets:
        # print(json.loads(df[ds].iloc[0].replace("'", '"'))["auroc"])
        df[ds] = df[ds].apply(lambda x: json.loads(x.replace("'", '"'))["auroc"])
    columns = ["model", "method", "seed"] + datasets
    df = df[columns]
    df = df.groupby(["model", "method"]).mean().reset_index().sort_values(["model","average"], ascending=False)
    return df.to_markdown(index=False)
leaderboard = create_leaderboard(RESULTS_FOLDER, PIPELINE_NAME)
print(leaderboard)

| model                                 | method               |   seed |   cifar100 |   average |
|:--------------------------------------|:---------------------|-------:|-----------:|----------:|
| vit_base_patch16_224_in21k_ft_cifar10 | vim                  |      1 |   0.987082 |  0.995861 |
| vit_base_patch16_224_in21k_ft_cifar10 | knn_euclides         |      1 |   0.98799  |  0.995314 |
| vit_base_patch16_224_in21k_ft_cifar10 | mahalanobis          |      1 |   0.987686 |  0.992963 |
| vit_base_patch16_224_in21k_ft_cifar10 | react                |      1 |   0.985091 |  0.991297 |
| vit_base_patch16_224_in21k_ft_cifar10 | projection           |      1 |   0.984652 |  0.991269 |
| vit_base_patch16_224_in21k_ft_cifar10 | max_logits           |      1 |   0.983717 |  0.990563 |
| vit_base_patch16_224_in21k_ft_cifar10 | odin                 |      1 |   0.984154 |  0.990455 |
| vit_base_patch16_224_in21k_ft_cifar10 | doctor               |      1 |   0.983136 |  0.989888 |
| vit_base

## CIFAR-100

In [10]:
PIPELINE_NAME = "ood_validation_cifar100"
def create_leaderboard(results_folder: str, pipeline_name: str):
    path = os.path.join(results_folder, pipeline_name)
    df = pd.read_csv(os.path.join(path, 'results.csv'))
    df = df.drop_duplicates()
    # drop nan
    df = df.dropna()
    # objective auroc
    df = df.query('objective == "auroc" and objective_dataset == "average"')
    # datasets
    datasets = ["svhn","cifar10", "average"]
    for ds in datasets:
        # print(json.loads(df[ds].iloc[0].replace("'", '"'))["auroc"])
        df[ds] = df[ds].apply(lambda x: json.loads(x.replace("'", '"'))["auroc"])
    columns = ["model", "method", "seed"] + datasets
    df = df[columns]
    df = df.groupby(["model", "method"]).mean().reset_index().sort_values(["model","average"], ascending=False)
    return df.to_markdown(index=False)
leaderboard = create_leaderboard(RESULTS_FOLDER, PIPELINE_NAME)
print(leaderboard)

| model                                  | method               |   seed |     svhn |   cifar10 |   average |
|:---------------------------------------|:---------------------|-------:|---------:|----------:|----------:|
| vit_base_patch16_224_in21k_ft_cifar100 | vim                  |      1 | 0.963327 |  0.954967 |  0.967974 |
| vit_base_patch16_224_in21k_ft_cifar100 | mahalanobis          |      1 | 0.953243 |  0.963029 |  0.963148 |
| vit_base_patch16_224_in21k_ft_cifar100 | knn_euclides         |      1 | 0.95498  |  0.94789  |  0.961939 |
| vit_base_patch16_224_in21k_ft_cifar100 | maxcosine            |      1 | 0.945643 |  0.958058 |  0.957294 |
| vit_base_patch16_224_in21k_ft_cifar100 | projection           |      1 | 0.947708 |  0.96081  |  0.956611 |
| vit_base_patch16_224_in21k_ft_cifar100 | react                |      1 | 0.935127 |  0.957787 |  0.950207 |
| vit_base_patch16_224_in21k_ft_cifar100 | max_logits           |      1 | 0.931011 |  0.956229 |  0.948208 |
| vit_base

## ImageNet

In [5]:
PIPELINE_NAME = "ood_validation_imagenet"
def create_leaderboard(results_folder: str, pipeline_name: str):
    path = os.path.join(results_folder, pipeline_name)
    df = pd.read_csv(os.path.join(path, 'results.csv'))
    df = df.drop_duplicates()
    # drop nan
    df = df.dropna()
    # objective auroc
    df = df.query('objective == "auroc" and objective_dataset == "average"')
    # datasets
    datasets = ["imagenet_o", "average"]
    for ds in datasets:
        # print(json.loads(df[ds].iloc[0].replace("'", '"'))["auroc"])
        df[ds] = df[ds].apply(lambda x: json.loads(x.replace("'", '"'))["auroc"])
    columns = ["model", "method", "seed"] + datasets
    df = df[columns]
    df = df.groupby(["model", "method"]).mean().reset_index().sort_values(["model","average"], ascending=True)
    df = df.drop(columns=["seed"])
    return df.to_markdown(index=False)
leaderboard = create_leaderboard(RESULTS_FOLDER, PIPELINE_NAME)
print(leaderboard)

| model                 | method               |   imagenet_o |   average |
|:----------------------|:---------------------|-------------:|----------:|
| deit_base_patch16_224 | gradnorm             |     0.259428 |  0.196753 |
| deit_base_patch16_224 | dice                 |     0.327737 |  0.268223 |
| deit_base_patch16_224 | doctor               |     0.5      |  0.5      |
| deit_base_patch16_224 | energy               |     0.695743 |  0.593337 |
| deit_base_patch16_224 | igeood_logits        |     0.578508 |  0.729425 |
| deit_base_patch16_224 | odin                 |     0.610155 |  0.810021 |
| deit_base_patch16_224 | max_logits           |     0.614988 |  0.812983 |
| deit_base_patch16_224 | react                |     0.724731 |  0.827364 |
| deit_base_patch16_224 | mahalanobis          |     0.763014 |  0.837237 |
| deit_base_patch16_224 | mcdropout            |     0.63648  |  0.841311 |
| deit_base_patch16_224 | msp                  |     0.63648  |  0.841311 |
| deit_base_