# import and setup

In [7]:
import sys
sys.path.insert(0, 'yolov10FX') # if you're working with yolov8 models, please change the path to 'yolov8FX'
import torch
from torch.utils.data import Dataset, DataLoader
from ultralytics import YOLOv10
import sklearn
from tqdm import tqdm
import os
import json
import numpy as np
from scipy.special import softmax
import pandas as pd
from collections import defaultdict
from sklearn import metrics
import gc
import ijson
from utils.monitor_construction import features_clustering_by_k_start, monitor_construction_from_features
from utils.evaluation import get_distance_dataset
from pathlib import Path

# utility functions

In [2]:
def compute_fpr(conf, label):
    ood_indicator = (label != -1).astype(int)
    fpr_list, tpr_list, _ = metrics.roc_curve(ood_indicator, conf)
    return fpr_list[np.argmax(tpr_list >= 0.95)]
    
def load_json_data(file_path):
    with open(file_path, 'r') as f:
        return json.load(f)
    
def filter_and_save_json(json_path, threshold=0.25):
    data = load_json_data(json_path)
    
    filtered_data = [entry for entry in data if entry['score'] >= threshold]
    
    with open(json_path, 'w') as f:
        json.dump(filtered_data, f)

def json2npy(model_type, id, dataset_name, threshold=0.25):
    base_path = os.path.join("feats", f"{model_type}_{id}", dataset_name)
    logits_file = os.path.join(base_path, f"logits_{threshold}.npy")
    labels_file = os.path.join(base_path, f"labels_{threshold}.npy")

    if os.path.isfile(logits_file) and os.path.isfile(labels_file):
        return np.load(logits_file), np.load(labels_file)

    json_file = os.path.join(base_path, "predictions.json")
    
    logits, labels = [], []
    with open(json_file, 'r') as f:
        for obj in ijson.items(f, 'item'):
            logits.append(obj["logits"])
            labels.append(obj["category_id"])

    logits = np.array(logits, dtype=np.float32)
    labels = np.array(labels, dtype=np.int32)

    np.save(logits_file, logits)
    np.save(labels_file, labels)
    return logits, labels

# dataset and inference

In [3]:
import shutil
def inference_dataset(dataset_name, model_name):
    model = YOLOv10(f'models/{model_name}/best.pt')
    model_type, id = model_name.split('_')
    split = "train" if dataset_name == f"{id}-train" else "val"
    data_path = f'datasets/{id}/dataset.yaml' if dataset_name.startswith(id) else f'datasets/{dataset_name}/dataset.yaml'
    OUTPUT_PATH = f'feats/{model_name}'
    output_folder = os.path.join(OUTPUT_PATH, dataset_name)
    if os.path.exists(output_folder):
        shutil.rmtree(output_folder)
        print(f"Removed existing folder: {output_folder}")
    model.val(
        data=str(data_path),
        verbose=True,
        device="cuda",
        split=split,
        save_json=True,
        project=OUTPUT_PATH,
        name=dataset_name,
        conf=0.25,
        batch=2
    )
    torch.cuda.empty_cache()
    gc.collect()
    json_file = f"{output_folder}/predictions.json"
    filter_and_save_json(json_file)
    # check the number of entries in the JSON file
    if os.path.exists(json_file):
        with open(json_file, 'r') as f:
            data = json.load(f)
        print(f"Number of entries in JSON file: {len(data)}")
    else:
        print("JSON file not found!")

In [4]:
id = "voc"
for dataset_name in [f"{id}-train", f"{id}-val", "OOD-open"]:
    inference_dataset(dataset_name, f"v10s_{id}")

Ultralytics YOLOv8.1.34 🚀 Python-3.9.19 torch-2.0.1+cu117 CUDA:0 (NVIDIA RTX A4000 Laptop GPU, 8192MiB)
YOLOv10s summary (fused): 293 layers, 8050440 parameters, 0 gradients, 24.5 GFLOPs


[34m[1mval: [0mScanning /home/hugo/datasets/voc/labels/train.cache... 16551 images, 1729 backgrounds, 0 corrupt: 100%|██████████| 16551/16551 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 8276/8276 [04:04<00:00, 33.90it/s]


                   all      16551      39612      0.929      0.896      0.956      0.813
               bicycle      16551       1208      0.947      0.894      0.954      0.818
                  bird      16551       1820      0.946      0.917      0.974      0.817
                  boat      16551       1397        0.9      0.797        0.9      0.698
                bottle      16551       2116      0.889      0.814      0.914      0.696
                   bus      16551        909      0.935      0.924      0.974      0.881
                   car      16551       4008      0.908      0.871      0.945      0.775
                   cat      16551       1616      0.965      0.966       0.99      0.928
                 chair      16551       4338      0.876      0.823      0.906       0.74
                   cow      16551       1058      0.924      0.915      0.968      0.814
                   dog      16551       2079      0.963      0.963      0.989      0.903
                 hors

[34m[1mval: [0mScanning /home/hugo/datasets/voc/labels/val.cache... 4952 images, 487 backgrounds, 0 corrupt: 100%|██████████| 4952/4952 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2476/2476 [01:06<00:00, 37.00it/s]


                   all       4952      12648      0.846      0.773       0.85      0.644
               bicycle       4952        389      0.869      0.799      0.869      0.674
                  bird       4952        576      0.891      0.753      0.839      0.585
                  boat       4952        393      0.774      0.613      0.709      0.449
                bottle       4952        657      0.836      0.645      0.761      0.507
                   bus       4952        254       0.83      0.783       0.87      0.743
                   car       4952       1541      0.879      0.814      0.897      0.699
                   cat       4952        370      0.879      0.865      0.931      0.781
                 chair       4952       1374      0.735      0.596      0.677      0.462
                   cow       4952        329      0.855      0.824      0.896      0.657
                   dog       4952        530      0.881      0.811        0.9      0.734
                 hors

[34m[1mval: [0mScanning /home/hugo/datasets/OOD-open/labels/val.cache... 0 images, 1852 backgrounds, 0 corrupt: 100%|██████████| 1852/1852 [00:00<?, ?it/s]




                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 926/926 [01:05<00:00, 14.21it/s]

                   all       1852          0          0          0          0          0





Speed: 0.4ms preprocess, 7.6ms inference, 0.0ms loss, 0.6ms postprocess per image
Saving feats/v10s_voc/OOD-open/predictions.json...
Results saved to [1mfeats/v10s_voc/OOD-open[0m
Number of entries in JSON file: 1861


In [6]:
id = "bdd"
for dataset_name in [f"{id}-train", f"{id}-val", f"ID-{id}-OOD-coco", "OOD-open"]:
    inference_dataset(dataset_name, f"v10s_{id}")

Ultralytics YOLOv8.1.34 🚀 Python-3.9.19 torch-2.0.1+cu117 CUDA:0 (NVIDIA RTX A4000 Laptop GPU, 8192MiB)
YOLOv10s summary (fused): 293 layers, 8042700 parameters, 0 gradients, 24.5 GFLOPs


[34m[1mval: [0mScanning /home/hugo/datasets/bdd/labels/val.cache... 10000 images, 0 backgrounds, 0 corrupt: 100%|██████████| 10000/10000 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 5000/5000 [02:40<00:00, 31.21it/s]


                   all      10000     185945      0.732      0.507      0.567      0.335
            pedestrian      10000      13425      0.748      0.569      0.664      0.347
                 rider      10000        658      0.643      0.447       0.49      0.259
                   car      10000     102837      0.827       0.74      0.829      0.541
                 truck      10000       4243      0.701      0.599      0.664       0.49
                   bus      10000       1660      0.701      0.584      0.653      0.509
                 train      10000         15          1          0     0.0202     0.0177
            motorcycle      10000        460      0.665      0.427      0.485      0.265
               bicycle      10000       1039       0.61      0.472      0.513      0.265
         traffic light      10000      26884      0.707       0.58      0.646      0.261
          traffic sign      10000      34724      0.717      0.654      0.708      0.394
Speed: 0.4ms preproce

[34m[1mval: [0mScanning /home/hugo/datasets/ID-bdd-OOD-coco/labels/val.cache... 0 images, 1880 backgrounds, 0 corrupt: 100%|██████████| 1880/1880 [00:00<?, ?it/s]




                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 940/940 [00:22<00:00, 42.51it/s]

                   all       1880          0          0          0          0          0





Speed: 0.4ms preprocess, 8.3ms inference, 0.0ms loss, 0.7ms postprocess per image
Saving /home/hugo/yolov10FX/feats/v10s_bdd/ID-bdd-OOD-coco/predictions.json...
Results saved to [1m/home/hugo/yolov10FX/feats/v10s_bdd/ID-bdd-OOD-coco[0m
Number of entries in JSON file: 1359
Ultralytics YOLOv8.1.34 🚀 Python-3.9.19 torch-2.0.1+cu117 CUDA:0 (NVIDIA RTX A4000 Laptop GPU, 8192MiB)
YOLOv10s summary (fused): 293 layers, 8042700 parameters, 0 gradients, 24.5 GFLOPs


[34m[1mval: [0mScanning /home/hugo/datasets/OOD-open/labels/val.cache... 0 images, 1852 backgrounds, 0 corrupt: 100%|██████████| 1852/1852 [00:00<?, ?it/s]




                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 926/926 [01:02<00:00, 14.76it/s]

                   all       1852          0          0          0          0          0





Speed: 0.4ms preprocess, 7.7ms inference, 0.0ms loss, 0.6ms postprocess per image
Saving /home/hugo/yolov10FX/feats/v10s_bdd/OOD-open/predictions.json...
Results saved to [1m/home/hugo/yolov10FX/feats/v10s_bdd/OOD-open[0m
Number of entries in JSON file: 930


# postprocessing methods

In [8]:
def msp_postprocess(logits):
    return torch.max(torch.softmax(logits, dim=1), dim=1)[0]

def ebo_postprocess(logits, temperature=1):
    return temperature * torch.logsumexp(logits / temperature, dim=1)

def maxlogits_postprocess(logits):
    return torch.max(logits, dim=1)[0]

def mahalanobis_compute_mean(logits):
    num_classes = logits[0].shape[0]
    all_labels = torch.tensor([logit.argmax(0) for logit in logits])
    class_mean = []
    centered_data = []
    for c in range(num_classes):
        class_samples = logits[all_labels.eq(c)]
        if class_samples.size(0) > 0:
            mean = class_samples.mean(0)
            centered = class_samples - mean
        else:
            mean = torch.zeros(logits.size(1))
            centered = torch.empty((0, logits.size(1)))
        class_mean.append(mean)
        centered_data.append(centered)

    if len(torch.cat(centered_data)) == 0:
        raise ValueError("No samples available for any class to compute covariance.")
    
    group_lasso = sklearn.covariance.EmpiricalCovariance(assume_centered=False)
    group_lasso.fit(torch.cat(centered_data).numpy().astype(np.float32))
    precision = torch.from_numpy(group_lasso.precision_).float()
    return class_mean, precision

def mahalanobis_postprocess(logits, class_mean, precision):
    num_classes = logits[0].shape[0]
    precision = precision.double()
    class_scores = torch.zeros((logits.shape[0], num_classes))
    for c in range(num_classes):
        if class_mean[c].numel() > 0:
            tensor = logits - class_mean[c].double()
            class_scores[:, c] = -torch.sum(tensor * torch.matmul(precision, tensor.t()).t(), dim=1)
    return torch.max(class_scores, dim=1)[0]

# instance matching and reorganization

In [9]:
def reorganize_predictions(file_path, threshold=0.25):
    data = load_json_data(file_path)
    reorganized_data = defaultdict(list)
    for item in data:
        if item['score'] >= threshold: 
            reorganized_data[item['image_id']].append({
                'category_id': item['category_id'],
                'bbox': item['bbox'],
                'score': item['score'],
                'logits': item['logits']
            })
    return dict(reorganized_data)

# dataset processing

In [10]:
def process_dataset(model_name, dataset_name, postprocess):
    model_type, id = model_name.split('_')
    original_file = f'feats/{model_name}/{dataset_name}/predictions.json' 
    if postprocess == mahalanobis_postprocess:
        orginal_train_file = f"feats/{model_name}/{id}-train/predictions.json"
        if not os.path.exists(orginal_train_file):
            print(f"Running inference for original {id}-train dataset...")
            predictions = inference_dataset(f'{id}-train', model_name)
            os.makedirs(os.path.dirname(orginal_train_file), exist_ok=True)
            with open(orginal_train_file, 'w') as f:
                json.dump(predictions, f)
    if not os.path.exists(original_file):
        print(f"Running inference for original {dataset_name} dataset...")
        predictions = inference_dataset(dataset_name, model_name)
        os.makedirs(os.path.dirname(original_file), exist_ok=True)
        with open(original_file, 'w') as f:
            json.dump(predictions, f)
    instance_predictions_original = reorganize_predictions(original_file)
    original_scores_list = []
    original_logits_list = []

    original_logits_list = [det['logits'] for ins_ori in instance_predictions_original.values() for det in ins_ori]
    if postprocess == mahalanobis_postprocess:
        instance_predictions_original_train = reorganize_predictions(orginal_train_file)
        original_train_logits_list = [det['logits'] for v in instance_predictions_original_train.values() for det in v]
        class_mean, precision = mahalanobis_compute_mean(torch.tensor(original_train_logits_list))
        original_scores_list = postprocess(torch.tensor(original_logits_list), class_mean, precision).tolist()
    else:
        original_scores_list = postprocess(torch.tensor(original_logits_list)).tolist()
    
    return original_scores_list

# ood metrics computation

In [11]:
def compute_combined_ood_metrics(model_name, id_dataset_name, ood_dataset_name):
    postprocess_methods = {
        'msp': msp_postprocess,
        'ebo': ebo_postprocess,
        'mls': maxlogits_postprocess,
        'mds': mahalanobis_postprocess,
    }

    fpr95_results = {}

    for method_name, postprocess in postprocess_methods.items():
        id_original_scores = process_dataset(model_name, id_dataset_name, postprocess)
        ood_original_scores = process_dataset(model_name, ood_dataset_name, postprocess)
        conf = np.array(id_original_scores + ood_original_scores)
        label = np.array([0]*len(id_original_scores) + [-1]*len(ood_original_scores))
        fpr95 = compute_fpr(conf, label)
        fpr95_results[method_name] = round(fpr95, 4)

    return fpr95_results

In [12]:
model_name = "v10s_voc"
id_dataset_name = "voc-val"
ood_dataset_name = "OOD-open"
fpr95_results = compute_combined_ood_metrics(model_name, id_dataset_name, ood_dataset_name)
print(fpr95_results)

{'msp': 0.6835, 'ebo': 0.9355, 'mls': 0.9264, 'mds': 0.6668}


In [14]:
model_name = "v10s_bdd"
id_dataset_name = "bdd-val"
ood_dataset_name = "OOD-open"
fpr95_results = compute_combined_ood_metrics(model_name, id_dataset_name, ood_dataset_name)
print(fpr95_results)
# bam evaluation

{'msp': 0.7645, 'ebo': 0.9097, 'mls': 0.8946, 'mds': 0.6054}


# bam evaluation

In [13]:
def evaluate_bam(model_name, density, ood_dataset_name):
    def npy2feats_dict(model_type, id, dataset_name):
        logits, labels = json2npy(model_type, id, dataset_name)
        feats_dict = defaultdict(list)
        for logit, label in zip(logits, labels):
            feats_dict[label].append(logit)
        return {k: np.array(v) for k, v in feats_dict.items()}

    model_type, id = model_name.split('_')
    monitor_path = f"monitors/{model_name}_{density}.pkl"
    import pickle
    if os.path.exists(monitor_path):
        with open(monitor_path, "rb") as f:
            monitor_dict = pickle.load(f)
    else:
        monitor_dict = {}
        feats_dict = npy2feats_dict(model_type, id, f"{id}-train")
    
        for k, v in tqdm(feats_dict.items(), desc="Building monitor"):
            if len(v) >= density:
                k_start = round(len(v)/density)
                clustering_results = features_clustering_by_k_start(v, k_start)
                monitor_dict[k] = monitor_construction_from_features(v, clustering_results)
        # save monitor_dict
        os.makedirs(os.path.dirname(monitor_path), exist_ok=True)
        with open(monitor_path, "wb") as f:
            pickle.dump(monitor_dict, f)
    feats_id = npy2feats_dict(model_type, id, f"{id}-val")
    distances_id = np.array([-distance for distances in get_distance_dataset(monitor_dict, feats_id).values() for distance in distances])
    
    feats_ood = npy2feats_dict(model_type, id, ood_dataset_name)
    distances_ood = np.array([-distance for distances in get_distance_dataset(monitor_dict, feats_ood).values() for distance in distances])
    
    conf = np.concatenate([distances_id, distances_ood])
    label = np.concatenate([np.ones(len(distances_id)), -np.ones(len(distances_ood))])
    fpr95 = compute_fpr(conf, label)
    return round(fpr95, 4)

In [14]:
# evaluation for v10s, no perturbation applied
ood_dataset_name = 'OOD-open'
id_dataset_name = 'voc-val'
model_name = "v10s_voc"
density = 5

fpr95_results= compute_combined_ood_metrics(model_name, id_dataset_name, ood_dataset_name)
fpr95_bam = evaluate_bam(model_name, density, ood_dataset_name)
fpr95_results['bam'] = fpr95_bam
print(fpr95_results)

Building monitor: 100%|██████████| 14/14 [00:01<00:00,  9.62it/s]


{'msp': 0.6835, 'ebo': 0.9355, 'mls': 0.9264, 'mds': 0.6668, 'bam': 0.4836}


In [17]:
# evaluation for v10s, no perturbation applied
ood_dataset_name = 'ID-voc-OOD-coco'
id_dataset_name = 'voc-val'
model_name = "v10s_voc"
density = 5

fpr95_results= compute_combined_ood_metrics(model_name, id_dataset_name, ood_dataset_name)
fpr95_bam = evaluate_bam(model_name, density, ood_dataset_name)
fpr95_results['bam'] = fpr95_bam
print(fpr95_results)

{'msp': 0.5241, 'ebo': 0.9394, 'mls': 0.9184, 'mds': 0.5501, 'bam': 0.4104}


In [17]:
# evaluation for v10s, no perturbation applied
ood_dataset_name = 'OOD-open'
id_dataset_name = 'bdd-val'
model_name = "v10s_bdd"
density = 50

fpr95_results= compute_combined_ood_metrics(model_name, id_dataset_name, ood_dataset_name)
fpr95_bam = evaluate_bam(model_name, density, ood_dataset_name)
fpr95_results['bam'] = fpr95_bam
print(fpr95_results)

Building monitor: 100%|██████████| 10/10 [05:37<00:00, 33.70s/it]


{'msp': 0.7645, 'ebo': 0.9097, 'mls': 0.8946, 'mds': 0.6054, 'bam': 0.4753}


In [18]:
ood_dataset_name = 'ID-bdd-OOD-coco'
id_dataset_name = 'bdd-val'
model_name = "v10s_bdd"
density = 50

fpr95_results= compute_combined_ood_metrics(model_name, id_dataset_name, ood_dataset_name)
fpr95_bam = evaluate_bam(model_name, density, ood_dataset_name)
fpr95_results['bam'] = fpr95_bam
print(fpr95_results)

{'msp': 0.7837, 'ebo': 0.9073, 'mls': 0.8904, 'mds': 0.6313, 'bam': 0.5129}
