# 🧐 Detectors' leaderboard

In [1]:
import os
import pandas as pd
import detectors
import warnings
import json

warnings.filterwarnings("ignore")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
RESULTS_FOLDER = detectors.config.RESULTS_DIR
RESULTS_FOLDER

'/Users/dadalto/github/detectors/results/'

In [40]:
bmethods = {
    "max_logits": "MaxLogits",
    "kl_matching": "KLMatching",
    "odin": "ODIN",
    "igeood_logits": "IGEOOD",
    "dice": "DICE",
    "gradnorm": "GradNorm",
    "vim": "ViM",
    "msp": "MSP",
    "energy": "Energy",
    "react": "ReAct",
    "gmm": "GMM",
    "mahalanobis": "Mahalanobis",
    "projection": "Proj. (Ours)",
    "react_projection": "ReaAct+Proj.",
    "knn_euclides": "KNN",
    "relative_mahalanobis": "RMD",
    "maxcosine": "MaxCosine",
}
bdatasets = {
    "mos_inaturalist": "iNat",
    "mos_sun": "SUN",
    "mos_places365": "Places",
    "textures": "Textures",
    "average": "Average",
    "cifar10": "cifar10",
    "cifar100": "cifar100",
    "svhn": "svhn",
    "isun": "isun",
    "lsun_c": "lsun_c",
    "lsun_r": "lsun_r",
    "tiny_imagenet_c": "tiny_imagenet_c",
    "tiny_imagenet_r": "tiny_imagenet_r",
    "textures": "textures",
    "places365": "places365",
    "english_chars": "english_chars",
    "ninco": "NINCO",
    "openimage_o": "OpenImage-O",
    "imagenet_o": "ImageNet-O",
    "ssb_hard": "SSB-Hard",
    "textures_clean": "Textures (clean)",
    # imagenet_o & ninco & ssb_hard & ssb_easy & textures_clean & places_clean & inaturalist_clean & openimage_o_clean & species_clean & average
    "imagenet_o": "ImageNet-O",
    "ninco": "NINCO",
    "ssb_hard": "SSB-Hard",
    "ssb_easy": "SSB-Easy",
    "textures_clean": "Textures (clean)",
    "places_clean": "Places (clean)",
    "inaturalist_clean": "iNaturalist (clean)",
    "openimage_o_clean": "OpenImage-O (clean)",
    "species_clean": "Species (clean)",
    "average": "Average",
}
bmodels = {
    "tv_resnet50": "ResNet50",
    "vit_base_patch16_224": "ViT-B-16",
    "dino_vitb16_ft_in1k": "DINO",
}
pretty_names = {
    **bmethods,
    **bdatasets,
    **bmodels,
}

In [4]:
def create_leaderboard(results_folder: str, pipeline_name: str):
    path = os.path.join(results_folder, pipeline_name)
    df = pd.read_csv(os.path.join(path, "results.csv"))
    df = df.drop_duplicates()
    # drop nan
    df = df.dropna()
    # objective auroc
    df = df.query('objective == "auroc" and objective_dataset == "average"')
    # datasets
    datasets = ["average"]
    for ds in datasets:
        # print(json.loads(df[ds].iloc[0].replace("'", '"'))["auroc"])
        df[ds] = df[ds].apply(lambda x: json.loads(x.replace("'", '"'))["auroc"])
    columns = ["model", "method", "seed"] + datasets
    df = df[columns]
    df = df.groupby(["model", "method"]).mean().reset_index().sort_values(["model", "average"], ascending=False)
    return df.to_markdown(index=False)

## CIFAR-10

In [27]:
PIPELINE_NAME = "ood_validation_cifar10"


def create_leaderboard(results_folder: str, pipeline_name: str):
    path = os.path.join(results_folder, pipeline_name)
    df = pd.read_csv(os.path.join(path, "results.csv"))
    df = df.drop_duplicates()
    # drop nan
    df = df.dropna()
    # objective auroc
    # filter sed 42
    df = df.query("seed != 42")
    df = df.query('objective == "auroc" and objective_dataset == "average"')
    # datasets
    datasets = ["cifar100", "average"]
    for ds in datasets:
        # print(json.loads(df[ds].iloc[0].replace("'", '"'))["auroc"])
        df[ds] = df[ds].apply(lambda x: json.loads(x.replace("'", '"'))["auroc"])
    columns = ["model", "method", "seed"] + datasets
    df = df[columns]
    df = df.groupby(["model", "method"]).mean().reset_index().sort_values(["model", "average"], ascending=False)
    return df.to_markdown(index=False)


leaderboard = create_leaderboard(RESULTS_FOLDER, PIPELINE_NAME)
print(leaderboard)

| model                                 | method               |   seed |   cifar100 |   average |
|:--------------------------------------|:---------------------|-------:|-----------:|----------:|
| vit_base_patch16_224_in21k_ft_cifar10 | vim                  |      1 |   0.987082 |  0.995861 |
| vit_base_patch16_224_in21k_ft_cifar10 | knn_euclides         |      1 |   0.98799  |  0.995314 |
| vit_base_patch16_224_in21k_ft_cifar10 | mahalanobis          |      1 |   0.987686 |  0.992963 |
| vit_base_patch16_224_in21k_ft_cifar10 | react                |      1 |   0.985091 |  0.991297 |
| vit_base_patch16_224_in21k_ft_cifar10 | projection           |      1 |   0.984652 |  0.991269 |
| vit_base_patch16_224_in21k_ft_cifar10 | max_logits           |      1 |   0.983717 |  0.990563 |
| vit_base_patch16_224_in21k_ft_cifar10 | odin                 |      1 |   0.984154 |  0.990455 |
| vit_base_patch16_224_in21k_ft_cifar10 | doctor               |      1 |   0.983136 |  0.989888 |
| vit_base

## CIFAR-100

In [10]:
PIPELINE_NAME = "ood_validation_cifar100"


def create_leaderboard(results_folder: str, pipeline_name: str):
    path = os.path.join(results_folder, pipeline_name)
    df = pd.read_csv(os.path.join(path, "results.csv"))
    df = df.drop_duplicates()
    # drop nan
    df = df.dropna()
    # objective auroc
    df = df.query('objective == "auroc" and objective_dataset == "average"')
    # datasets
    datasets = ["svhn", "cifar10", "average"]
    for ds in datasets:
        # print(json.loads(df[ds].iloc[0].replace("'", '"'))["auroc"])
        df[ds] = df[ds].apply(lambda x: json.loads(x.replace("'", '"'))["auroc"])
    columns = ["model", "method", "seed"] + datasets
    df = df[columns]
    df = df.groupby(["model", "method"]).mean().reset_index().sort_values(["model", "average"], ascending=False)
    return df.to_markdown(index=False)


leaderboard = create_leaderboard(RESULTS_FOLDER, PIPELINE_NAME)
print(leaderboard)

| model                                  | method               |   seed |     svhn |   cifar10 |   average |
|:---------------------------------------|:---------------------|-------:|---------:|----------:|----------:|
| vit_base_patch16_224_in21k_ft_cifar100 | vim                  |      1 | 0.963327 |  0.954967 |  0.967974 |
| vit_base_patch16_224_in21k_ft_cifar100 | mahalanobis          |      1 | 0.953243 |  0.963029 |  0.963148 |
| vit_base_patch16_224_in21k_ft_cifar100 | knn_euclides         |      1 | 0.95498  |  0.94789  |  0.961939 |
| vit_base_patch16_224_in21k_ft_cifar100 | maxcosine            |      1 | 0.945643 |  0.958058 |  0.957294 |
| vit_base_patch16_224_in21k_ft_cifar100 | projection           |      1 | 0.947708 |  0.96081  |  0.956611 |
| vit_base_patch16_224_in21k_ft_cifar100 | react                |      1 | 0.935127 |  0.957787 |  0.950207 |
| vit_base_patch16_224_in21k_ft_cifar100 | max_logits           |      1 | 0.931011 |  0.956229 |  0.948208 |
| vit_base

## ImageNet

In [11]:
PIPELINE_NAME = "ood_validation_imagenet"


def create_leaderboard(results_folder: str, pipeline_name: str):
    path = os.path.join(results_folder, pipeline_name)
    df = pd.read_csv(os.path.join(path, "results.csv"))
    print(df.columns)
    df = df.drop_duplicates()
    # drop nan
    df = df.dropna()
    # objective auroc
    df = df.query('objective == "auroc" and objective_dataset == "average"')
    # datasets
    datasets = ["imagenet_o", "openimage_o"]
    for ds in datasets:
        # print(json.loads(df[ds].iloc[0].replace("'", '"'))["auroc"])
        df[ds] = df[ds].apply(lambda x: json.loads(x.replace("'", '"'))["auroc"])
    columns = ["model", "method", "seed"] + datasets
    df = df[columns]
    df = df.groupby(["model", "method"]).mean().reset_index().sort_values(["model", "method"], ascending=True)
    df = df.drop(columns=["seed"])
    # cerate new column with the new averages accross ax 1
    df["new_mean"] = df[datasets].mean(1)
    return df.to_markdown(index=False)


leaderboard = create_leaderboard(RESULTS_FOLDER, PIPELINE_NAME)
print(leaderboard)

Index(['model', 'method', 'best_kwargs', 'n_trials', 'seed', 'limit_fit',
       'limit_run', 'objective', 'objective_dataset', 'mos_inaturalist',
       'mos_sun', 'mos_places365', 'textures', 'imagenet_o', 'openimage_o',
       'imagenet_a', 'imagenet_r', 'uniform', 'gaussian', 'average'],
      dtype='object')
| model                                   | method               |   imagenet_o |   openimage_o |   new_mean |
|:----------------------------------------|:---------------------|-------------:|--------------:|-----------:|
| deit_base_patch16_224                   | dice                 |     0.345513 |      0.275685 |   0.310599 |
| deit_base_patch16_224                   | doctor               |     0.509893 |      0.51981  |   0.514852 |
| deit_base_patch16_224                   | energy               |     0.692966 |      0.768235 |   0.730601 |
| deit_base_patch16_224                   | gradnorm             |     0.259428 |      0.130447 |   0.194938 |
| deit_base_patch16

In [80]:
PIPELINE_NAME = "ood_benchmark_imagenet"
path = os.path.join(RESULTS_FOLDER, PIPELINE_NAME)
df = pd.read_csv(os.path.join(path, "results.csv"))
print(df.columns)
df = df.drop_duplicates()
# drop nan
df = df.dropna()
datasets = [
    "imagenet_o",
    "ninco",
    "ssb_hard",
    "ssb_easy",
    "textures_clean",
    "places_clean",
    # "inaturalist_clean",
    # "openimage_o_clean",
    "species_clean",
    # "average",
]
# expand row with new columns from dict saved on cell
for row in df.iterrows():
    for dataset in datasets:
        data = json.loads(row[1][dataset].replace("'", '"'))
        for key, value in data.items():
            df.loc[row[0], f"{dataset}_{key}"] = value

key1 = "auroc"
key2 = "fpr_at_0.95_tpr"
keep_columns = [["model", "method"]] + [[f"{dataset}_{key1}", f"{dataset}_{key2}"] for dataset in datasets]
# flatten list
keep_columns = [item for sublist in keep_columns for item in sublist]
print(keep_columns)
df = df[keep_columns]
# create average column
df[f"average_{key1}"] = df[[f"{dataset}_{key1}" for dataset in datasets]].mean(1)
df[f"average_{key2}"] = df[[f"{dataset}_{key2}" for dataset in datasets]].mean(1)
df = df.groupby(["model", "method"]).mean().reset_index().sort_values(["model", "method"], ascending=True)
# sort by average auroc
df = df.sort_values([f"average_{key2}"], ascending=False)
df

Index(['model', 'method', 'imagenet_o', 'ninco', 'ssb_hard', 'ssb_easy',
       'textures_clean', 'places_clean', 'inaturalist_clean',
       'openimage_o_clean', 'species_clean', 'average', 'method_kwargs'],
      dtype='object')
['model', 'method', 'imagenet_o_auroc', 'imagenet_o_fpr_at_0.95_tpr', 'ninco_auroc', 'ninco_fpr_at_0.95_tpr', 'ssb_hard_auroc', 'ssb_hard_fpr_at_0.95_tpr', 'ssb_easy_auroc', 'ssb_easy_fpr_at_0.95_tpr', 'textures_clean_auroc', 'textures_clean_fpr_at_0.95_tpr', 'places_clean_auroc', 'places_clean_fpr_at_0.95_tpr', 'species_clean_auroc', 'species_clean_fpr_at_0.95_tpr']


Unnamed: 0,model,method,imagenet_o_auroc,imagenet_o_fpr_at_0.95_tpr,ninco_auroc,ninco_fpr_at_0.95_tpr,ssb_hard_auroc,ssb_hard_fpr_at_0.95_tpr,ssb_easy_auroc,ssb_easy_fpr_at_0.95_tpr,textures_clean_auroc,textures_clean_fpr_at_0.95_tpr,places_clean_auroc,places_clean_fpr_at_0.95_tpr,species_clean_auroc,species_clean_fpr_at_0.95_tpr,average_auroc,average_fpr_at_0.95_tpr
0,dino_vitb16_ft_in1k,dice,0.531808,0.8945,0.58581,0.875638,0.666241,0.802885,0.574224,0.807947,0.596259,0.829861,0.396767,0.954248,0.577796,0.860465,0.561272,0.860792
5,dino_vitb16_ft_in1k,mahalanobis,0.775744,0.8705,0.82953,0.790405,0.740227,0.889423,0.900019,0.622517,0.89128,0.697917,0.886374,0.640523,0.858001,0.709302,0.840168,0.745798
1,dino_vitb16_ft_in1k,energy,0.592287,0.825,0.712369,0.746853,0.688197,0.769231,0.681883,0.675497,0.828892,0.53125,0.783475,0.673203,0.758858,0.680233,0.720852,0.700181
12,dino_vitb16_ft_in1k,react,0.719294,0.8185,0.805441,0.735454,0.709643,0.798077,0.836686,0.668874,0.887062,0.59375,0.897899,0.509804,0.829788,0.662791,0.812259,0.683893
3,dino_vitb16_ft_in1k,kl_matching,0.724374,0.8235,0.83132,0.698707,0.779728,0.802885,0.872866,0.662252,0.890416,0.53125,0.868231,0.594771,0.862598,0.598837,0.83279,0.673172
4,dino_vitb16_ft_in1k,knn_euclides,0.800088,0.8165,0.83675,0.762504,0.731455,0.875,0.912744,0.503311,0.93053,0.423611,0.883515,0.660131,0.856337,0.645349,0.850203,0.669487
7,dino_vitb16_ft_in1k,msp,0.643153,0.853,0.809327,0.693603,0.765814,0.778846,0.830337,0.629139,0.8738,0.513889,0.863355,0.542484,0.837287,0.645349,0.803296,0.665187
2,dino_vitb16_ft_in1k,igeood_logits,0.624328,0.815,0.771333,0.703641,0.733047,0.774038,0.764593,0.629139,0.870981,0.458333,0.842667,0.575163,0.816442,0.610465,0.77477,0.652254
6,dino_vitb16_ft_in1k,max_logits,0.611294,0.8245,0.773007,0.691392,0.741017,0.759615,0.769072,0.629139,0.861297,0.493056,0.835893,0.529412,0.814713,0.604651,0.772328,0.647395
8,dino_vitb16_ft_in1k,odin,0.606904,0.8255,0.770855,0.688329,0.740427,0.764423,0.763157,0.629139,0.859599,0.486111,0.831451,0.529412,0.814282,0.604651,0.769525,0.646795


In [94]:
# assuming that df is the dataframe you want to convert to LaTeX table
key = key2
df_pretty = df.replace({"method": pretty_names, "model": pretty_names})
# filter columns with fpr or model or method
df_pretty = df_pretty.filter(regex=f"{key2}|model|method")
# remove _fpr from the dataset names
df_pretty.columns = df_pretty.columns.str.replace(f"_{key1}", " (AUROC)")
df_pretty.columns = df_pretty.columns.str.replace(f"_{key2}", " (TNR)")
# replace names with latex names
df_pretty = df_pretty.rename(columns={"model": "Model", "method": "Method"})
df_pretty = df_pretty.rename(columns=bdatasets)
# replace min values with textbf command
min_values = df_pretty.min(0).values[2:]
max_values = df_pretty.max(0).values[2:]
# for all values in the table, if value matches the min value, replace it with textbf command
df_pretty[[c for c in df_pretty.columns if "TNR" in c]] = df_pretty[[c for c in df_pretty.columns if "TNR" in c]].applymap(lambda x: f"\\textbf{{{100-100*x:.1f}}}" if x in min_values else x)
df_pretty[[c for c in df_pretty.columns if "AUROC" in c]] = df_pretty[[c for c in df_pretty.columns if "AUROC" in c]].applymap(lambda x: f"\\textbf{{{100*x:.1f}}}" if x in max_values else x)
# replace the float values to 2 decimal points {:.2f} string representation
df_pretty[[c for c in df_pretty.columns if "TNR" in c]] = df_pretty[[c for c in df_pretty.columns if "TNR" in c]].applymap(lambda x: f"{100-100*x:.1f}" if isinstance(x, float) else x)
df_pretty[[c for c in df_pretty.columns if "AUROC" in c]] = df_pretty[[c for c in df_pretty.columns if "AUROC" in c]].applymap(lambda x: f"{100*x:.1f}" if isinstance(x, float) else x)
# replace DINO wirth ""
df_pretty = df_pretty.replace("DINO", "")

latex_table = df_pretty.to_latex(index=False)

print(latex_table)
# Model & Method & ImageNet-O & NINCO & SSB & SSB & Textures & Places & iNaturalist & OpenImage-O & Species & Average \\
#       &       &           &       & Hard & Easy & (clean) & (clean) & (clean) & (clean) & (clean) &        \\

\begin{tabular}{llllllllll}
\toprule
Model & Method & imagenet_o (TNR) & ninco (TNR) & ssb_hard (TNR) & ssb_easy (TNR) & textures_clean (TNR) & places_clean (TNR) & species_clean (TNR) & average (TNR) \\
\midrule
 & DICE & 10.5 & 12.4 & 19.7 & 19.2 & 17.0 & 4.6 & 14.0 & 13.9 \\
 & Mahalanobis & 12.9 & 21.0 & 11.1 & 37.7 & 30.2 & 35.9 & 29.1 & 25.4 \\
 & Energy & 17.5 & 25.3 & 23.1 & 32.5 & 46.9 & 32.7 & 32.0 & 30.0 \\
 & ReAct & 18.2 & 26.5 & 20.2 & 33.1 & 40.6 & 49.0 & 33.7 & 31.6 \\
 & KLMatching & 17.7 & 30.1 & 19.7 & 33.8 & 46.9 & 40.5 & 40.1 & 32.7 \\
 & KNN & 18.3 & 23.7 & 12.5 & 49.7 & 57.6 & 34.0 & 35.5 & 33.1 \\
 & MSP & 14.7 & 30.6 & 22.1 & 37.1 & 48.6 & 45.8 & 35.5 & 33.5 \\
 & IGEOOD & 18.5 & 29.6 & 22.6 & 37.1 & 54.2 & 42.5 & 39.0 & 34.8 \\
 & MaxLogits & 17.5 & 30.9 & 24.0 & 37.1 & 50.7 & 47.1 & 39.5 & 35.3 \\
 & ODIN & 17.5 & 31.2 & 23.6 & 37.1 & 51.4 & 47.1 & 39.5 & 35.3 \\
 & projection2 & 18.5 & 31.3 & 19.2 & 51.0 & 56.2 & 46.4 & 41.9 & 37.8 \\
 & projection3 & 18.5 &

In [60]:
df_pretty.min(0).values

array(['DINO', 'DICE', 0.752, 0.6267437904048996, 0.7451923076923077,
       0.46357615894039733, 0.3020833333333333, 0.4444444444444444,
       0.27676240208877284, 0.4157608695652174, 0.4186046511627907,
       0.5233984854216706], dtype=object)

In [7]:
PIPELINE_NAME = "ood_benchmark_imagenet_textures_clean"


def create_leaderboard(results_folder: str, pipeline_name: str):
    path = os.path.join(results_folder, pipeline_name)
    df = pd.read_csv(os.path.join(path, "results.csv"))
    print(df.columns)
    df = df.drop_duplicates()
    # drop nan
    df = df.dropna()
    methods = [
        "msp",
        "odin",
        "energy",
        "max_logits",
        "kl_matching",
        "igeood_logits",
        "mahalanobis",
        "gradnorm",
        "dice",
        "vim",
        "react",
        "knn_euclides",
        "projection",
        "react_projection",
    ]
    models = ["tv_resnet50", "vit_base_patch16_224"]
    # filter methods
    df = df.query("method in @methods and model in @models")
    datasets = ["imagenet_o", "openimage_o", "ninco", "ssb_hard", "average"]
    datasets = ["textures_clean"]
    # expand row with new columns from dict saved on cell
    for row in df.iterrows():
        for dataset in datasets:
            data = json.loads(row[1][dataset].replace("'", '"'))
            for key, value in data.items():
                df.loc[row[0], f"{dataset}_{key}"] = value

    key = "auroc"
    keep_columns = ["model", "method"] + [f"{dataset}_{key}" for dataset in datasets]
    df = df[keep_columns]
    df = df.groupby(["model", "method"]).mean().reset_index().sort_values(["model", "method"], ascending=True)

    # report 2 decimal cases for the scores
    for dataset in datasets:
        df[f"{bdatasets[dataset]}"] = df[f"{dataset}_{key}"].apply(lambda x: f"{100*x:.1f}")
    df = df.drop([f"{dataset}_{key}" for dataset in datasets], axis=1)

    # rename methods
    df["method"] = df["method"].replace(bmethods)
    df["model"] = df["model"].replace(bmodels)
    return df.to_markdown(index=False)


leaderboard = create_leaderboard(RESULTS_FOLDER, PIPELINE_NAME)
print(leaderboard)

Index(['model', 'method', 'textures_clean', 'average', 'method_kwargs'], dtype='object')
| model    | method           |   Textures (clean) |
|:---------|:-----------------|-------------------:|
| ResNet50 | DICE             |               91.8 |
| ResNet50 | Energy           |               88.3 |
| ResNet50 | GradNorm         |               92.1 |
| ResNet50 | IGEOOD           |               87.5 |
| ResNet50 | KLMatching       |               85.2 |
| ResNet50 | KNN              |               97.7 |
| ResNet50 | MaxLogits        |               88.1 |
| ResNet50 | MSP              |               82.8 |
| ResNet50 | ODIN             |               88.1 |
| ResNet50 | Proj.            |               95.9 |
| ResNet50 | ReAct            |               91.5 |
| ResNet50 | react_projection |               96.1 |
| ResNet50 | ViM              |               97.5 |
| ViT-B-16 | DICE             |               35.6 |
| ViT-B-16 | Energy           |               85.4 |
| ViT-B-16

In [24]:
PIPELINE_NAME = "ood_benchmark_imagenet_textures_clean"


def create_leaderboard(results_folder: str, pipeline_name: str):
    path = os.path.join(results_folder, pipeline_name)
    df = pd.read_csv(os.path.join(path, "results.csv"))
    print(df.columns)
    df = df.drop_duplicates()
    # drop nan
    df = df.dropna()
    methods = [
        "msp",
        "odin",
        "energy",
        "max_logits",
        "kl_matching",
        "igeood_logits",
        "mahalanobis",
        "gradnorm",
        "dice",
        "vim",
        "react",
        "knn_euclides",
        "projection",
        "react_projection",
    ]
    models = ["tv_resnet50", "vit_base_patch16_224"]
    # filter methods
    df = df.query("method in @methods and model in @models")
    datasets = ["imagenet_o", "openimage_o", "ninco", "ssb_hard", "average"]
    datasets = ["textures_clean"]
    # expand row with new columns from dict saved on cell
    for row in df.iterrows():
        for dataset in datasets:
            data = json.loads(row[1][dataset].replace("'", '"'))
            for key, value in data.items():
                df.loc[row[0], f"{dataset}_{key}"] = value

    key = "auroc"
    keep_columns = ["model", "method"] + [f"{dataset}_{key}" for dataset in datasets]
    df = df[keep_columns]
    df = df.groupby(["model", "method"]).mean().reset_index().sort_values(["model", "method"], ascending=True)

    # report 2 decimal cases for the scores
    for dataset in datasets:
        df[f"{bdatasets[dataset]}"] = df[f"{dataset}_{key}"].apply(lambda x: f"{100*x:.1f}")
    df = df.drop([f"{dataset}_{key}" for dataset in datasets], axis=1)

    new_df = pd.DataFrame(["Method", "ResNet-50", "ViT-B/16"])
    rows = []
    ct = 0
    for method in methods:
        row = [bmethods[method]]
        for model in models:
            try:
                value = df.query("model == @model and method == @method")[f"{bdatasets[dataset]}"].values[0]
            except:
                ct = 1
                continue
            row.append(value)
        if ct:
            ct = 0
            continue
        rows.append(row)
    new_df = pd.DataFrame(rows, columns=["Method", "ResNet-50", "ViT-B/16"])
    return new_df.to_markdown(index=False)


leaderboard = create_leaderboard(RESULTS_FOLDER, PIPELINE_NAME)
print(leaderboard)
# ours 96.74

Index(['model', 'method', 'textures_clean', 'average', 'method_kwargs'], dtype='object')
| Method       |   ResNet-50 |   ViT-B/16 |
|:-------------|------------:|-----------:|
| MSP          |        82.8 |       86.7 |
| ODIN         |        88.1 |       86.2 |
| Energy       |        88.3 |       85.4 |
| MaxLogits    |        88.1 |       86.6 |
| KLMatching   |        85.2 |       89.5 |
| IGEOOD       |        87.5 |       87.7 |
| Mahalanobis  |        88.7 |       90.3 |
| GradNorm     |        92.1 |       47.7 |
| DICE         |        91.8 |       35.6 |
| ViM          |        97.5 |       90.6 |
| ReAct        |        91.5 |       89.9 |
| KNN          |        97.7 |       93.5 |
| Proj. (Ours) |        95.9 |       94.2 |
