In [1]:
import cv2
import numpy as np
import matplotlib.pyplot as plt

from models import multiscale_model as multiscale
from models import model
from models import track_model
from src.utils import visualize_detection_video, visualize_tracking_video

In [2]:
model_path = "checkpoints/RGB/yolov8n2/best.pt"
video_path = "data/test_data/det_track/RGB/videos/V_BIRD_050.mp4"

In [3]:
yolo_model = model.DetectionModel(
                            model_path,
                            conf_threshold=0.3,
                            iou_threshold=0.45
                            )
multiscale_model = multiscale.DetectionModel(
                                    model_path,
                                    conf_threshold=0.3,
                                    iou_threshold=0.1
                                    )
yolo_track_model = track_model.TrackingModel(
                            yolo_model,
                            )
multiscale_track_model = track_model.TrackingModel(
                            multiscale_model,
                            )

In [4]:
yolo_frames = yolo_model.video_detect(video_path,
                                    conf_threshold=0.1,
                                    iou_threshold=0.2
                                    )

In [5]:
multiscale_frames = multiscale_model.video_detect(video_path,
                                                conf_threshold=0.3,
                                                iou_threshold=0.1,
                                                  )

Processing frame 1/303
Processing frame 2/303
Processing frame 3/303
Processing frame 4/303
Processing frame 5/303
Processing frame 6/303
Processing frame 7/303
  → Added missing object: label 0.0, score 0.274
Processing frame 8/303
Processing frame 9/303
Processing frame 10/303
Processing frame 11/303
Processing frame 12/303
Processing frame 13/303
  → Added missing object: label 0.0, score 0.249
Processing frame 14/303
Processing frame 15/303
  → Added missing object: label 0.0, score 0.296
Processing frame 16/303
Processing frame 17/303
Processing frame 18/303
Processing frame 19/303
Processing frame 20/303
Processing frame 21/303
Processing frame 22/303
Processing frame 23/303
Processing frame 24/303
Processing frame 25/303
  → Added missing object: label 0.0, score 0.289
Processing frame 26/303
Processing frame 27/303
Processing frame 28/303
Processing frame 29/303
Processing frame 30/303
Processing frame 31/303
Processing frame 32/303
  → Added missing object: label 0.0, score 0.

In [6]:
def bb_intersection_over_union(boxA, boxB):
	xA = max(boxA[0], boxB[0])
	yA = max(boxA[1], boxB[1])
	xB = min(boxA[2], boxB[2])
	yB = min(boxA[3], boxB[3])
	interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1)
	boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1)
	boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1)
	iou = interArea / float(boxAArea + boxBArea - interArea)
	return iou

In [7]:
miss_counter ={
    "yolo": 0,
    "multiscale": 0
}
class_count = {
    "bird": 0,
    "drone": 0,
}

bboxes_count = {}

for idx, (yolo_frame, multiscale_frame) in enumerate(zip(yolo_frames, multiscale_frames)):
    # print(f"Frame {i+1}:")
    # print(f"YOLO boxes: {len(yolo_frame['boxes'])}, Multiscale boxes: {len(multiscale_frame['boxes'])}")
    # if len(multiscale_frame["boxes"]) > 1:
    #     print(f"Frame {idx + 1}: Multiscale detected {len(multiscale_frame['boxes'])} boxes")
    #     print(multiscale_frame['boxes'])
    #     print(bb_intersection_over_union(multiscale_frame["boxes"][0].numpy(), multiscale_frame["boxes"][1].numpy()))
        
    if len(yolo_frame["boxes"]) == 0:
        miss_counter["yolo"] += 1
    if len(multiscale_frame["boxes"]) == 0:
        miss_counter["multiscale"] += 1
        
    for label in multiscale_frames[idx]["labels"]:
        if label == 0:
            class_count["bird"] += 1
        else:
            class_count["drone"] += 1
    bboxes_count[len(multiscale_frame["boxes"])] = bboxes_count.get(len(multiscale_frame["boxes"]), 0) + 1

for count, freq in bboxes_count.items():
    print(f"Number of frames with {count} boxes: {freq}")
print(f"Class counts - Birds: {class_count['bird']}, Drones: {class_count['drone']}")
print(f"Missed detections - YOLO: {miss_counter['yolo']}, Multiscale: {miss_counter['multiscale']}")

Number of frames with 1 boxes: 277
Number of frames with 0 boxes: 20
Number of frames with 2 boxes: 5
Number of frames with 3 boxes: 1
Class counts - Birds: 290, Drones: 0
Missed detections - YOLO: 152, Multiscale: 20


In [8]:
# visualize_detection_video(
#         detection_frames=yolo_frames,
#         video_path=video_path,
#         output_path="outputs/yolo_detection.mp4",
#         )

visualize_detection_video(
        detection_frames=multiscale_frames,
        video_path=video_path,
        output_path="outputs/multiscale_detection.mp4",
        )


In [9]:
# yolo_track_outputs = yolo_track_model.video_track(video_path)
# multiscale_track_outputs = multiscale_track_model.video_track(video_path)

In [10]:
# for idx, frame in enumerate(multiscale_track_outputs):
#     print(f"Frame {idx + 1}:")
#     print(frame)

In [11]:
# visualize_tracking_video(
#     video_path=video_path,
#     tracking_results=multiscale_track_outputs,
#     output_path="outputs/multiscale_tracking.mp4"
# )