# extract validation feats

In [63]:
# %%
import torch
from detectron2.utils.logger import setup_logger
setup_logger()

from detectron2 import model_zoo
from detectron2.config import get_cfg
import detectron2.data.transforms as T
from detectron2.checkpoint import DetectionCheckpointer
from detectron2.modeling import build_model
from detectron2.engine import DefaultPredictor
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog
from detectron2.data.datasets import register_coco_instances
from detectron2.data import MetadataCatalog, DatasetCatalog
from detectron2.data import DatasetMapper, build_detection_test_loader
from detectron2.data import get_detection_dataset_dicts

import numpy as np
import cv2
from PIL import Image
import os
import argparse
from pickle import load

import fiftyone as fo
import fiftyone.zoo as foz
from fiftyone import ViewField as F
torch.manual_seed(0)
np.random.seed(0)

torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

from vos.detection.modeling.regnet import build_regnet_fpn_backbone
# # create a parser object
# parser = argparse.ArgumentParser(description='Description of your program')

# # add an optional "--model" argument 
# parser.add_argument('--backbone', type=str, help='Description of the backbone argument')
# parser.add_argument('--id', type=str, help='Description of the in-distribution dataset argument')
# parser.add_argument('--tau', nargs='+', type=float, help='Description of the tau argument')

# # %%
# args = parser.parse_args()

id = "voc"
backbone = "resnet"
taus = [0.05]

VOC_THING_CLASSES = ['person',
                     'bird',
                     'cat',
                     'cow',
                     'dog',
                     'horse',
                     'sheep',
                     'airplane',
                     'bicycle',
                     'boat',
                     'bus',
                     'car',
                     'motorcycle',
                     'train',
                     'bottle',
                     'chair',
                     'dining table',
                     'potted plant',
                     'couch',
                     'tv',
                     ]

BDD_THING_CLASSES = ['pedestrian',
                    'rider',
                    'car',
                    'truck',
                    'bus',
                    'train',
                    'motorcycle',
                    'bicycle',
                    'traffic light',
                    'traffic sign']
label_list = VOC_THING_CLASSES if id == 'voc' else BDD_THING_CLASSES
label_dict = {i:label for i, label in enumerate(label_list)}

cfg = get_cfg()
cfg.merge_from_file(f"vos/detection/configs/BDD-Detection/faster-rcnn/{backbone}.yaml")
cfg.MODEL.WEIGHTS = f"model_final_vos_{backbone}_{id}.pth" 
cfg.MODEL.DEVICE='cuda'
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5
cfg.MODEL.ROI_HEADS.NUM_CLASSES = len(label_list)
model = build_model(cfg)
model.eval()
checkpointer = DetectionCheckpointer(model)
checkpointer.load(cfg.MODEL.WEIGHTS)

aug = T.ResizeShortestEdge(
            [cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MIN_SIZE_TEST], cfg.INPUT.MAX_SIZE_TEST
        )
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# %%
def inference(inputs):
    with torch.no_grad():
        images = model.preprocess_image(inputs)  
        feats = model.backbone(images.tensor)  
        proposals, _ = model.proposal_generator(images, feats, None)  # RPN

        features_ = [feats[f] for f in model.roi_heads.box_in_features]
        box_features = model.roi_heads.box_pooler(features_, [x.proposal_boxes for x in proposals])
        box_features = model.roi_heads.box_head(box_features)  # features of all 1k candidates
        predictions = model.roi_heads.box_predictor(box_features)
        pred_instances, pred_inds = model.roi_heads.box_predictor.inference(predictions, proposals)
        pred_instances = model.roi_heads.forward_with_given_boxes(feats, pred_instances)

        # output boxes, masks, scores, etc
        pred_instances = model._postprocess(pred_instances, inputs, images.image_sizes)  # scale box to orig size
        # features of the proposed boxes
        feats = box_features[pred_inds]
    return pred_instances, feats

# %%
feats_list = []
for tau in taus:
    # print(f"tau: {tau}")
    monitors_dict = {}
    for class_name in label_list:
        monitor_path = f"monitors/{id}/{backbone}/{class_name}/monitor_for_clustering_parameter" + "_tau_" + str(tau) + ".pkl"
        # monitor_path = f"Monitors/{class_name}/monitor_for_clustering_parameter" + "_tau_" + str(tau) + ".pkl"
        if os.path.exists(monitor_path):
            with open(monitor_path, 'rb') as f:
                monitor = load(f)
            for i in range(len(monitor.good_ref)):
                monitor.good_ref[i].ivals = monitor.good_ref[i].ivals*np.array([1-delta, 1+delta])
            monitors_dict[class_name] = monitor
        else:
            print(f"monitor for {class_name} not found")
    eval_list = ["bdd-val", "ID-bdd-OOD-coco", "OOD-open"] if id == "bdd" else ["voc-val", "ID-voc-OOD-coco", "OOD-open"]
    # for dataset_name in eval_list:
    for dataset_name in ["voc-val"]:
        print(f"evaluating on {dataset_name}")
        dataset_val = fo.load_dataset(dataset_name)
        i = 0
        feats_list = []
        with fo.ProgressBar() as pb:
            for sample in pb(dataset_val):
                image = cv2.imread(sample.filepath)
                height, width = image.shape[:2]
                image = aug.get_transform(image).apply_image(image)
                image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1)).to(device)
                inputs = [{"image": image, "height": height, "width": width}]
                preds, feats = inference(inputs)
                feats = feats.cpu().detach().numpy()
                boxes = preds[0]["instances"].get("pred_boxes").tensor.cpu().detach().numpy()
                scores = preds[0]["instances"].get("scores").cpu().detach().numpy()
                classes = preds[0]["instances"].pred_classes.cpu().detach().numpy()
                
                detections = []
                oods = []             
                for label, score, box, feat in zip(classes, scores, boxes, feats):
                    label = label_dict[label]
                    x1, y1, x2, y2 = box
                    rel_box = [x1/width, y1/height, (x2 - x1) / width, (y2 - y1) / height]
                    verdict = monitors_dict[label].make_verdicts(feat[np.newaxis, :])[0]
                    detections.append(
                        fo.Detection(
                            label=label,
                            bounding_box=rel_box,
                            confidence=score,
                            verdict=verdict,
                            feature_idx=i
                        ),
                    )
                    i += 1
                    feats_list.append(feat)
                #     if not verdict:
                #         oods.append(
                #             fo.Detection(
                #                 label="OOD",
                #                 bounding_box=rel_box
                #             ),
                #         )
                # sample["OOD"] = fo.Detections(detections=oods)
                sample["prediction"] = fo.Detections(detections=detections)
                sample.save()
        



[05/17 08:52:50 d2.checkpoint.detection_checkpoint]: [DetectionCheckpointer] Loading from model_final_vos_resnet_voc.pth ...


The checkpoint state_dict contains keys that are not used by the model:
  pixel_mean
  pixel_std
  roi_heads.logistic_regression.{bias, weight}
  roi_heads.noise.noise
  roi_heads.weight_energy.{bias, weight}


evaluating on voc-val
 100% |███████████████| 4952/4952 [6.9m elapsed, 0s remaining, 11.7 samples/s]      


In [64]:
results = dataset_val.evaluate_detections(
"prediction",
gt_field="detections",
eval_key="eval",
compute_mAP=True,
)

Evaluating detections...
 100% |███████████████| 4952/4952 [34.1s elapsed, 0s remaining, 151.0 samples/s]      
Performing IoU sweep...
 100% |███████████████| 4952/4952 [27.0s elapsed, 0s remaining, 198.2 samples/s]      


In [78]:
feats_npy = np.array(feats_list)
print(f"Saved {feats_npy.shape[0]} features")

Saved 17479 features


In [79]:
tp_prediction_view = dataset_val.filter_labels("prediction", F("eval") == "tp")
class_names = list(label_dict.values())
features_idx_dict = {cls:[] for cls in class_names}
with fo.ProgressBar() as pb:
    for sample in pb(tp_prediction_view):
        for detection in sample.prediction.detections:
            label_pred = detection.label
            feature_idx = detection.feature_idx
            features_idx_dict[label_pred].append(feature_idx)
feats_tp_dict = {cls:feats_npy[features_idx_dict[cls]] for cls in class_names}
sum = 0
for k,v in features_idx_dict.items():
    print(f"{k}: {len(v)}")
    sum += len(v)
print(f"total: {sum}")

 100% |███████████████| 4811/4811 [9.7s elapsed, 0s remaining, 561.9 samples/s]       


In [83]:
fp_prediction_view = dataset_val.filter_labels("prediction", F("eval") == "fp")
class_names = list(label_dict.values())
features_idx_dict = {cls:[] for cls in class_names}
with fo.ProgressBar() as pb:
    for sample in pb(fp_prediction_view):
        for detection in sample.prediction.detections:
            label_pred = detection.label
            feature_idx = detection.feature_idx
            features_idx_dict[label_pred].append(feature_idx)
feats_fp_dict = {cls:feats_npy[features_idx_dict[cls]] for cls in class_names}
sum = 0
for k,v in features_idx_dict.items():
    print(f"{k}: {len(v)}")
    sum += len(v)
print(f"total: {sum}")

 100% |███████████████| 2640/2640 [5.4s elapsed, 0s remaining, 506.6 samples/s]      
person: 1869
bird: 103
cat: 89
cow: 119
dog: 177
horse: 114
sheep: 85
airplane: 67
bicycle: 115
boat: 189
bus: 69
car: 502
motorcycle: 102
train: 121
bottle: 275
chair: 705
dining table: 221
potted plant: 314
couch: 238
tv: 138
total: 5612


In [12]:
import pickle
with open(f'{id}_feats_tp_dict.pickle', 'wb') as f:
    # dump the data into the file using pickle.dump()
    pickle.dump(features_tp_dict, f)


In [13]:
with open(f'{id}_feats_fp_dict.pickle', 'wb') as f:
    # dump the data into the file using pickle.dump()
    pickle.dump(features_fp_dict, f)

# monitor evaluation

In [1]:
from pickle import load
import os
def load_monitors(id, backbone, label_list, tau_list):
    monitors_dict = dict()
    for class_name, tau in zip(label_list, tau_list):
        monitor_path = f"monitors/{id}/{backbone}/{class_name}/monitor_for_clustering_parameter" + "_tau_" + str(tau) + ".pkl"
        # monitor_path = f"Monitors/{class_name}/monitor_for_clustering_parameter" + "_tau_" + str(tau) + ".pkl"
        if os.path.exists(monitor_path):
            with open(monitor_path, 'rb') as f:
                monitor = load(f)
            monitors_dict[class_name] = monitor
        else:
            print(f"monitor for {monitor_path} not found")
    return monitors_dict

In [7]:
import pandas as pd
import numpy as np
import pickle
import gradio as gr
VOC_THING_CLASSES = ['person',
                        'bird',
                        'cat',
                        'cow',
                        'dog',
                        'horse',
                        'sheep',
                        'airplane',
                        'bicycle',
                        'boat',
                        'bus',
                        'car',
                        'motorcycle',
                        'train',
                        'bottle',
                        'chair',
                        'dining table',
                        'potted plant',
                        'couch',
                        'tv',
                        ]

BDD_THING_CLASSES = ['pedestrian',
                    'rider',
                    'car',
                    'truck',
                    'bus',
                    # 'train',
                    'motorcycle',
                    'bicycle',
                    'traffic light',
                    'traffic sign']
benchmark_vos = {"voc": {"resnet":[47.53, 51.33], "regnet":[47.77, 48.33]}, "bdd": {"resnet":[44.27, 35.54], "regnet":[36.61, 27.24]}}

# def tune_parameter(id, backbone, tau, delta, num_limit, progress=gr.Progress()):
def tune_parameter(id, backbone, tau, delta, num_limit):
    # benchmark
    benchmark = benchmark_vos[id][backbone]
    # load feats
    label_list = VOC_THING_CLASSES if id == 'voc' else BDD_THING_CLASSES
    dataset_name = "voc-val" if id == "voc" else "bdd-val"
    with open(f'eval_feats/{id}/{backbone}/{dataset_name}_feats_tp_dict.pickle', 'rb') as f:
        feats_tp_dict = pickle.load(f)
    with open(f'eval_feats/{id}/{backbone}/{dataset_name}_feats_fp_dict.pickle', 'rb') as f:
        feats_fp_dict = pickle.load(f)
    
    # load monitors
    tau_list = [tau]*len(label_list)
    if id == "voc":
        for i in [label_list.index("person"), label_list.index("car")]:
            tau_list[i] = 0.05
    monitors_dict = load_monitors(id, backbone, label_list, tau_list)
    for label in label_list:
        if len(feats_tp_dict[label]) < num_limit:
            for i in range(len(monitors_dict[label].good_ref)):
                monitors_dict[label].good_ref[i].ivals = monitors_dict[label].good_ref[i].ivals*np.array([1-delta, 1+delta])
    data_num_clusters = [[label, len(monitors_dict[label].good_ref)] for label in label_list]
    
    # make verdicts on ID data
    data_tp = []
    data_fp = []
    accept_sum = {"tp": 0, "fp": 0}
    reject_sum = {"tp": 0, "fp": 0}
    # progress(0, desc="Starting")
    # for label in progress.tqdm(label_list, desc="Evaluation on ID data"):    
    for label in label_list:  
        verdict = monitors_dict[label].make_verdicts(feats_tp_dict[label])
        data_tp.append([label, len(verdict), np.sum(verdict)/len(verdict)])
        accept_sum["tp"] += np.sum(verdict)
        reject_sum["tp"] += len(verdict) - np.sum(verdict)   
        verdict = monitors_dict[label].make_verdicts(feats_fp_dict[label])
        data_fp.append([label, len(verdict), (len(verdict)-np.sum(verdict))/len(verdict)])
        accept_sum["fp"] += np.sum(verdict)
        reject_sum["fp"] += len(verdict) - np.sum(verdict)
    TPR = round((accept_sum['tp'] / (reject_sum['tp'] + accept_sum['tp'])*100), 2)
    FPR =  round((accept_sum['fp'] / (reject_sum['fp'] + accept_sum['fp'])*100), 2)
    dataset_name = "PASCAL-VOC" if id == "voc" else "BDD100k"
    df_summary = pd.DataFrame([[dataset_name, f"{TPR}%", "95%", f"{FPR}%"]], columns=["Dataset", "TPR", "TPR(benchmark)", "FPR"])
    
    data_ood = []
    eval_list = ["ID-voc-OOD-coco", "OOD-open"] if id == "voc" else ["ID-bdd-OOD-coco", "OOD-open"]
    i = 0
    # for dataset_name in progress.tqdm(eval_list, desc="Evaluation on OOD data"):
    for dataset_name in eval_list: 
        accept_sum = {"tp": 0, "fp": 0}
        reject_sum = {"tp": 0, "fp": 0}
        with open(f'eval_feats/{id}/{backbone}/{dataset_name}_feats_fp_dict.pickle', 'rb') as f:
            feats_fp_dict = pickle.load(f)
        for label in label_list:
            verdict = monitors_dict[label].make_verdicts(feats_fp_dict[label])
            # data_ood.append([label, len(verdict), (len(verdict)-np.sum(verdict)), (len(verdict)-np.sum(verdict))/len(verdict)])
            accept_sum["fp"] += np.sum(verdict)
            reject_sum["fp"] += len(verdict) - np.sum(verdict)
        FPR =  round((accept_sum['fp'] / (reject_sum['fp'] + accept_sum['fp'])*100), 2)
        # data_ood.append([dataset_name, accept_sum['fp'], reject_sum['fp'], (reject_sum['fp'] + accept_sum['fp']), str(FPR)+"%", benchmark[i]])
        data_ood.append([dataset_name, str(FPR)+"%", str(benchmark[i])+"%"])
        i += 1
    
    # prepare dataframes
    # df_ood = pd.DataFrame(data_ood, columns=["class", "accepted FP", "rejected FP", "Total num.", "FPR", "FPR(benchmark)"])
    df_ood = pd.DataFrame(data_ood, columns=["Dataset", "FPR", "FPR(benchmark)"])
    df_ood["Dataset"] = ["COCO", "Open Images"]
    df_tp = pd.DataFrame(data_tp, columns=["class", "number", "TP acceptance"])
    df_tp.sort_values(by=["TP acceptance"], ascending=False, inplace=True)
    df_tp["TP acceptance"] = df_tp["TP acceptance"].apply(lambda x: f"{x*100:.2f}%")
    df_fp = pd.DataFrame(data_fp, columns=["class", "number", "FP rejection rate"])
    df_fp.sort_values(by=["FP rejection rate"], ascending=False, inplace=True)
    df_fp["FP rejection rate"] = df_fp["FP rejection rate"].apply(lambda x: f"{x*100:.2f}%")
    df_clusters = pd.DataFrame(data_num_clusters, columns=["class", "number of clusters"])
    return df_summary, df_ood, df_tp, df_fp, df_clusters

In [8]:
examples=[["voc", "resnet", 0.05, 0, 100000], ["bdd", "resnet", 0.05, 0, 100000]]

In [9]:
with gr.Blocks(theme="gradio/monochrome") as demo:
    gr.Markdown("# Monitor intervals Tuning App")
    gr.Markdown("This app is used to tune the monitor intervals to achieve the desired TPR of 95%")
    with gr.Row():
        with gr.Column():
            id = gr.inputs.Radio(["voc", "bdd"], label="In-distribution dataset")
            backbone = gr.inputs.Radio(["resnet", "regnet"], label="Backbone")
            tau_dropdown = gr.inputs.Dropdown(choices=[1.0, 0.1, 0.05, 0.01], label="Tau")
            delta_slider = gr.inputs.Slider(minimum=0, maximum=19, default=0.5, label="Delta")
            number_limit_number = gr.inputs.Number(default=100000, label="Number limit")
            button = gr.Button("Run")
        with gr.Column():
            with gr.Tab("Monitor Performance"):
                id_table_output = gr.Dataframe(label="In-distribution Monitor Performance")
                ood_table_output = gr.Dataframe(label="Out-of-distribution Monitor Performance")
            with gr.Tab("TP Acceptance"):
                TP_table_output = gr.Dataframe(label="TP Acceptance")
            with gr.Tab("FP Rejection"):
                FP_table_output = gr.Dataframe(label="FP Rejection")
            with gr.Tab("Number of clusters"):
                num_clusters_table_output = gr.Dataframe(label="Number of clusters")
    examples_block = gr.Examples(inputs=[id, backbone, tau_dropdown, delta_slider, number_limit_number], examples=examples, fn=tune_parameter, outputs=[id_table_output, ood_table_output, TP_table_output, FP_table_output, num_clusters_table_output], cache_examples=True)
    button.click(fn=tune_parameter, 
                                inputs=[id, backbone, tau_dropdown, delta_slider, number_limit_number], 
                                outputs=[id_table_output, ood_table_output, TP_table_output, FP_table_output, num_clusters_table_output])
demo.queue(concurrency_count=10).launch()

  super().__init__(
  super().__init__(
  super().__init__(
  super().__init__(value=default, label=label, optional=optional)


Caching examples at: '/home/hugo/bdd100k-monitoring/gradio_cached_examples/74'
Running on local URL:  http://127.0.0.1:7862

To create a public link, set `share=True` in `launch()`.


