In [26]:
import numpy as np
import torch
import cv2

from detectron2.config import LazyConfig
from detectron2.utils.logger import setup_logger
from detectron2.data import DatasetCatalog, MetadataCatalog, get_detection_dataset_dicts
import detectron2.data.transforms as T
from detectron2.structures import Instances, Boxes, pairwise_iou
from detectron2.data.detection_utils import annotations_to_instances

from data_utils import read_split_file, register_dataset
from detection_pipeline import ElevatorDetector, ElevatorDetectorLazyConf

setup_logger()

<Logger detectron2 (DEBUG)>

In [48]:
backbone = "vit"
use_recovery = True
dataset_name = "mixed"
iou_thresh = 0.75

In [3]:
if backbone == "vit":
    cfg = LazyConfig.load("configs/mask_rcnn_vit_base.py")
    recovery_weights = (
        "models/recovery_vit/model_best.pth" if use_recovery else None
    )
    pipeline = ElevatorDetectorLazyConf(cfg, recovery_weights=recovery_weights)
else:
    pipeline = ElevatorDetector(use_recovery=use_recovery)

[32m[03/01 02:45:26 d2.checkpoint.detection_checkpoint]: [0m[DetectionCheckpointer] Loading from models/segmentation_vit/model_best.pth ...
[32m[03/01 02:45:28 d2.checkpoint.detection_checkpoint]: [0m[DetectionCheckpointer] Loading from models/recovery_vit/model_best.pth ...


In [4]:
# Register datasets
datasets = read_split_file(f"data/panels/{dataset_name}/split.txt")
for spl, im_paths in zip(["train", "val", "test"], datasets):
    DatasetCatalog.register(
        f"{dataset_name}_{spl}",
        lambda im_paths=im_paths: register_dataset(im_paths),
    )
    MetadataCatalog.get(f"{dataset_name}_{spl}").set(
        thing_classes=["label", "button"], thing_colors=[(0, 255, 0), (0, 0, 255)]
    )
metadata = MetadataCatalog.get(f"{dataset_name}_train")

testset = get_detection_dataset_dicts(f"{dataset_name}_test", filter_empty=False)


registering mixed dataset: 100%|██████████| 22/22 [00:00<00:00, 27.38it/s]

[32m[03/01 02:45:30 d2.data.build]: [0mDistribution of instances among all 2 categories:
[36m|  category  | #instances   |  category  | #instances   |
|:----------:|:-------------|:----------:|:-------------|
|   label    | 423          |   button   | 423          |
|            |              |            |              |
|   total    | 846          |            |              |[0m





In [17]:
resize_aug = T.ResizeShortestEdge(short_edge_length=1024, max_size=1024)
# for d in testset:
d = testset[0]
gt_instances: Instances = annotations_to_instances(d["annotations"], (d["height"], d["width"]))
print(gt_instances)


Instances(num_instances=24, image_height=1363, image_width=2047, fields=[gt_boxes: Boxes(tensor([[ 181.,   32.,  415.,  279.],
        [ 188.,  387.,  421.,  626.],
        [ 200.,  730.,  433.,  966.],
        [ 215., 1065.,  446., 1290.],
        [ 757., 1072.,  988., 1298.],
        [1305., 1074., 1538., 1297.],
        [ 749.,   32.,  992.,  274.],
        [ 749.,  385.,  990.,  628.],
        [ 752.,  737.,  989.,  973.],
        [1316.,  740., 1552.,  973.],
        [1323.,  391., 1561.,  635.],
        [1329.,   38., 1566.,  284.],
        [ 466.,   31.,  693.,  268.],
        [ 471.,  393.,  694.,  620.],
        [ 480.,  744.,  697.,  961.],
        [ 490., 1078.,  705., 1293.],
        [1041., 1084., 1258., 1295.],
        [1042.,  747., 1265.,  966.],
        [1046.,  396., 1271.,  623.],
        [1048.,   39., 1275.,  270.],
        [1619.,   57., 1843.,  284.],
        [1610.,  406., 1827.,  627.],
        [1600.,  749., 1813.,  962.],
        [1585., 1077., 1794., 1290.]]

In [None]:
original_img = cv2.imread(d["file_name"])
height, width = original_img.shape[:2]
img = resize_aug.get_transform(original_img).apply_image(original_img)
img_tensor = torch.as_tensor(img.astype("float32").transpose(2, 0, 1))
input = {
    "image": img_tensor,
    "height": height,
    "width": width,
    "img_path": d["file_name"],
}

In [23]:
predictions: Instances = pipeline([input])[0]["instances"].to("cpu")

In [24]:
gt_labels = gt_instances[gt_instances.gt_classes == 0].gt_boxes
gt_btns = gt_instances[gt_instances.gt_classes == 1].gt_boxes

pred_labels = predictions[predictions.pred_classes == 0].pred_boxes
pred_btns = predictions[predictions.pred_classes == 1].pred_boxes

In [25]:
print(gt_labels)
print(pred_labels)

Boxes(tensor([[ 181.,   32.,  415.,  279.],
        [ 188.,  387.,  421.,  626.],
        [ 200.,  730.,  433.,  966.],
        [ 215., 1065.,  446., 1290.],
        [ 757., 1072.,  988., 1298.],
        [1305., 1074., 1538., 1297.],
        [ 749.,   32.,  992.,  274.],
        [ 749.,  385.,  990.,  628.],
        [ 752.,  737.,  989.,  973.],
        [1316.,  740., 1552.,  973.],
        [1323.,  391., 1561.,  635.],
        [1329.,   38., 1566.,  284.]]))
Boxes(tensor([[ 752.5822, 1076.4617,  995.5629, 1298.4468],
        [ 197.8863,  733.8450,  435.5687,  964.3733],
        [ 213.0734, 1064.6841,  450.7564, 1289.7684],
        [1320.4713,   36.7600, 1567.2487,  284.7813],
        [1306.7620, 1073.2610, 1539.5475, 1292.0822],
        [ 178.3348,   34.6388,  418.8491,  278.6847],
        [ 747.1640,   27.7416,  994.8909,  273.0611],
        [ 743.7171,  381.9838,  994.4982,  625.8149],
        [1314.0166,  739.8699, 1553.0757,  973.5745],
        [ 182.4760,  384.9702,  423.5211,  6

In [32]:
labels_iou = pairwise_iou(gt_labels, pred_labels)
print(labels_iou)

tensor([[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.9616, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.9450, 0.0000, 0.0000],
        [0.0000, 0.9580, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.9695, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000],
        [0.9310, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000, 0.0000, 0.9611, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.9603, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.9410, 0.0000,
         0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,

In [49]:
scores_per_gt = torch.max(labels_iou, dim=1).values
num_correct = torch.where(scores_per_gt > iou_thresh, 1, 0).sum()
num_correct
total_labels = len(gt_labels)
print(num_correct / total_labels)

tensor(1.)
