# Notes

We provide this notebook for inference and visualizations. 

You can either load images from a dataloader(see Sec. 1) or from a local path(see Sec. 2).

Welcome to join [IDEA](https://idea.edu.cn/en)([中文网址](https://idea.edu.cn/))!

In [4]:
import os, sys
import torch, json
import numpy as np

from main import build_model_main
from util.slconfig import SLConfig
from datasets import build_dataset
from util.visualizer import COCOVisualizer
from util import box_ops

# 0. Initialize and Load Pre-trained Models

In [5]:
model_config_path = "config/DINO/DINO_4scale_swin.py" # change the path of the model config file
model_checkpoint_path = "checkpoint_best.pth" # change the path of the model checkpoint
# See our Model Zoo section in README.md for more details about our pretrained models.

In [7]:
args = SLConfig.fromfile(model_config_path) 
# args.device = 'cuda:4' 
args.device = 'cpu' 
model, criterion, postprocessors = build_model_main(args)
checkpoint = torch.load(model_checkpoint_path, map_location='cpu')
model.load_state_dict(checkpoint['model'])
_ = model.eval()

  from .autonotebook import tqdm as notebook_tqdm


ModuleNotFoundError: No module named 'MultiScaleDeformableAttention'

In [44]:
def display_box_strided_yolo_fusion(img, boxes, scores, labels, confidence_threshold = 0.3):
    image = img.copy()
    image_height, image_width, _ = image.shape
    
    # loop over each of the detection
    for box, score, label in zip(boxes, scores, labels):
        # extract the confidence of the detection
        # draw bounding boxes only if the detection confidence is above...
        # ... a certain threshold, else skip
        if score > confidence_threshold:
            # get the class id
            class_name = str(label)

            color = (0, 255, 255)
            # get the bounding box coordinates
            box_x, box_y, box_width, box_height =  [box[0], box[1], box[2], box[3]]
            # draw a rectangle around each detected object
            cv2.rectangle(image, (int(box_x), int(box_y)), (int(box_width), int(box_height)), color, thickness=4)
            # put the text on top of the frame
            cv2.putText(image, str(round(score,2)), (int(box_x), int(box_y - 5)), cv2.FONT_HERSHEY_SIMPLEX, 1, color, 2)
    
    plt.figure(figsize=(20,20))
    plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    plt.show()
    
    return image

def get_image_id(img_name):
  img_name = img_name.split('.png')[0] if img_name.endswith('.png') else img_name.split('.txt')[0]
  sceneList = ['M', 'A', 'E', 'N']
  cameraIndx = int(img_name.split('_')[0].split('camera')[1])
  sceneIndx = sceneList.index(img_name.split('_')[1])
  frameIndx = int(img_name.split('_')[2])
  imageId = int(str(cameraIndx)+str(sceneIndx)+str(frameIndx))
  return imageId

In [47]:
folder_image = "/home/nghiemtd/ICML_2024/Track_4/datasets/fullFisheye/test/images"
transform = T.Compose([
    T.RandomResize([800], max_size=1333),
    T.ToTensor(),
    T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
dt_json = []
for index, name in enumerate(tqdm(os.listdir(folder_image))):
    if index > 3:
        break
    file_image = os.path.join(folder_image, name)
    
    image = Image.open(file_image).convert("RGB")
    image_or = np.asarray(image)
    image_height, image_width, _ = np.asarray(image).shape
    image, _ = transform(image, None)
    output = model.cuda()(image[None].cuda())
    output = postprocessors['bbox'](output, torch.Tensor([[1.0, 1.0]]).cuda())[0]
    boxes = output['boxes'].cpu().tolist()
    new_boxes = []
    for out in boxes:
        new_boxes.append([out[0]*image_width, out[1]*image_height, out[2]*image_width, out[3]*image_height])
#     print(new_boxes)
    scores = output['scores'].cpu().tolist()
    labels = output['labels'].cpu().tolist()
#     a = display_box_strided_yolo_fusion(image_or, new_boxes, scores, labels, confidence_threshold = 0.3)
    for box, score, label in zip(new_boxes, scores, labels):
        # extract the confidence of the detection
        # draw bounding boxes only if the detection confidence is above...
        # ... a certain threshold, else skip
        if score >= 0.0:
            dt_json.append({
                'image_id': get_image_id(name),
                'category_id': int(label),
                'bbox': [box[0], box[1], (box[2]-box[0]), (box[3]-box[1])],
                'score': float(score)
              })

100%|███████████████████████████████████████| 2712/2712 [16:12<00:00,  2.79it/s]


In [48]:
import json
json.dump(dt_json, open('test_dt_pre.json', 'w')) 

In [61]:
def preds_to_coco_dt(preds, conf=0.5):
    coco_dt = []
    for obj in preds:
        if obj['score'] >= conf:
            coco_dt.extend([
                {
                  'image_id': int(obj['image_id']),
                  'category_id': obj['category_id'],
                  'bbox': [obj['bbox'][0], obj['bbox'][1], obj['bbox'][2], obj['bbox'][3]],
                  'score': obj['score']
                } 
            ])
    return coco_dt

In [73]:
test_preds = json.load(open('test_dt_pre.json'))
test_dt_json = preds_to_coco_dt(test_preds, conf=0.4)

In [72]:

from FishEye8K.evaluation_Linux.pycocotools.coco import COCO
from FishEye8K.evaluation_Linux.pycocotools.cocoeval_modified import COCOeval

coco_gt = COCO('/home/nghiemtd/ICML_2024/Track_4/test_gt.json')
coco_dt = coco_gt.loadRes(test_dt_json)

coco_eval = COCOeval(coco_gt, coco_dt, 'bbox')
# coco_eval.params.catIds = [0, 1, 2, 4]
coco_eval.evaluate()
coco_eval.accumulate()
coco_eval.summarize()

print('----------------------------------------')
print('AP_0.5-0.95', coco_eval.stats[0])
print('AP_0.5', coco_eval.stats[1])
print('AP_S', coco_eval.stats[3])
print('AP_M', coco_eval.stats[4])
print('AP_L', coco_eval.stats[5])
print('f1_score: ', coco_eval.stats[20])
print('----------------------------------------')

loading annotations into memory...
Done (t=0.10s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.04s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=11.63s).
Accumulating evaluation results...
DONE (t=0.74s).
 Average area under PR curve    (mAP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.311
 Average area under PR curve    (mAP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.520
 Average area under PR curve    (mAP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.324
 Average area under PR curve    (mAP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.187
 Average area under PR curve    (mAP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.511
 Average area under PR curve    (mAP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.796
 Average Recall                 (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.212
 Average Recall                 (AR) @[ IoU=0.50:0.95 