In [9]:
import cv2
import numpy as np
import matplotlib.pyplot as plt

from models import multiscale_model as multiscale
from models import model
from src.utils import visualize_detection_video

In [10]:
model_path = "checkpoints/yolov8n2/best.pt"
video_path = "test_data/det_track/RGB/videos/V_BIRD_043.mp4"

In [11]:
yolo_model = model.DetectionModel(
                            model_path,
                            conf_threshold=0.3,
                            iou_threshold=0.45
                            )
multiscale_model = multiscale.DetectionModel(
                                    model_path,
                                    conf_threshold=0.3,
                                    iou_threshold=0.4
                                    )

In [12]:
yolo_frames = yolo_model.video_detect(video_path)

In [13]:
multiscale_frames = multiscale_model.video_detect(video_path)

Box 0: [    0.49676     0.72323      0.5241     0.75911], Score: 0.5102986097335815, Label: 0.0
Box 1: [    0.49182     0.70981     0.53428     0.77266], Score: 0.07454720470640394, Label: 0.0
IoU between box 0 and box 1: 0.9605
Box 0: [    0.49644     0.71404     0.52788     0.75755], Score: 0.3411507209142049, Label: 0.0
Box 1: [    0.49578     0.71532     0.52829     0.75169], Score: 0.22376987669203016, Label: 1.0
IoU between box 0 and box 1: 0.9921
Box 0: [    0.49857     0.71703     0.52488     0.74948], Score: 0.5665166775385538, Label: 0.0
Box 1: [    0.49271      0.7029     0.53407     0.76316], Score: 0.06613085005018446, Label: 0.0
IoU between box 0 and box 1: 0.9597
Box 0: [    0.49966     0.71425     0.52659      0.7476], Score: 0.5961758295694987, Label: 0.0
Box 1: [    0.49567     0.70157     0.53516     0.75972], Score: 0.0707089172469245, Label: 0.0
IoU between box 0 and box 1: 0.9648
Box 0: [    0.50169     0.70835     0.52738     0.74261], Score: 0.5903107722600301, 

In [14]:
def bb_intersection_over_union(boxA, boxB):
	xA = max(boxA[0], boxB[0])
	yA = max(boxA[1], boxB[1])
	xB = min(boxA[2], boxB[2])
	yB = min(boxA[3], boxB[3])
	interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1)
	boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1)
	boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1)
	iou = interArea / float(boxAArea + boxBArea - interArea)
	return iou

In [15]:
miss_counter ={
    "yolo": 0,
    "multiscale": 0
}
class_count = {
    "bird": 0,
    "drone": 0,
}

bboxes_count = {}

for idx, (yolo_frame, multiscale_frame) in enumerate(zip(yolo_frames, multiscale_frames)):
    # print(f"Frame {i+1}:")
    # print(f"YOLO boxes: {len(yolo_frame['boxes'])}, Multiscale boxes: {len(multiscale_frame['boxes'])}")
    # if len(multiscale_frame["boxes"]) > 1:
    #     print(f"Frame {idx + 1}: Multiscale detected {len(multiscale_frame['boxes'])} boxes")
    #     print(multiscale_frame['boxes'])
    #     print(bb_intersection_over_union(multiscale_frame["boxes"][0].numpy(), multiscale_frame["boxes"][1].numpy()))
        
    if len(yolo_frame["boxes"]) == 0:
        miss_counter["yolo"] += 1
    if len(multiscale_frame["boxes"]) == 0:
        miss_counter["multiscale"] += 1
        
    for label in multiscale_frames[idx]["labels"]:
        if label == 0:
            class_count["bird"] += 1
        else:
            class_count["drone"] += 1
    bboxes_count[len(multiscale_frame["boxes"])] = bboxes_count.get(len(multiscale_frame["boxes"]), 0) + 1

for count, freq in bboxes_count.items():
    print(f"Number of frames with {count} boxes: {freq}")
print(f"Class counts - Birds: {class_count['bird']}, Drones: {class_count['drone']}")
print(f"Missed detections - YOLO: {miss_counter['yolo']}, Multiscale: {miss_counter['multiscale']}")

Number of frames with 1 boxes: 183
Number of frames with 2 boxes: 120
Number of frames with 0 boxes: 6
Number of frames with 3 boxes: 1
Class counts - Birds: 421, Drones: 5
Missed detections - YOLO: 166, Multiscale: 6


In [16]:
visualize_detection_video(
        detection_frames=yolo_frames,
        video_path=video_path,
        output_path="outputs/yolo_detection.mp4",
        )

visualize_detection_video(
        detection_frames=multiscale_frames,
        video_path=video_path,
        output_path="outputs/multiscale_detection.mp4",
        )


AttributeError: 'list' object has no attribute 'tolist'