In [3]:
import cv2
from ultralytics import YOLO
import json
import time

main_model = YOLO("yolov8n.pt")
sub_model = YOLO("yolov8n.pt")

def detect_objects(frame, model):
    results = model(frame)
    return results

def detect_subobjects(frame, main_bboxes):
    sub_detections = []
    for bbox in main_bboxes:
        x1, y1, x2, y2 = bbox
        cropped_frame = frame[y1:y2, x1:x2]
        results = detect_objects(cropped_frame, sub_model)
        sub_detections.append(results)
    return sub_detections


def format_to_json(main_detections, sub_detections):
    output = []
    for i, (main_obj, sub_objs) in enumerate(zip(main_detections[0].boxes, sub_detections)):
        main_json = {
            "object": main_detections[0].names[int(main_obj.cls[0])],
            "id": i,
            "bbox": main_obj.xyxy[0].tolist(),
            "subobject": []
        }

        for j, sub_obj_res in enumerate(sub_objs):
            if len(sub_obj_res) > 0 and len(sub_obj_res[0].boxes) > 0:
                for sub_obj in sub_obj_res[0].boxes:
                    sub_json = {
                        "object": sub_obj_res[0].names[int(sub_obj.cls[0])],
                        "id": j,
                        "bbox": sub_obj.xyxy[0].tolist()
                    }
                    main_json["subobject"].append(sub_json)
            else:
                sub_json = {
                    "object": "None",
                    "id": j,
                    "bbox": []
                }
                main_json["subobject"].append(sub_json)

        output.append(main_json)
    return json.dumps(output, indent=4)

def main(video_path):
    cap = cv2.VideoCapture(video_path)
    frame_count = 0
    start_time = time.time()

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        main_results = detect_objects(frame, main_model)
        main_results = main_model(frame)

        main_bboxes = []
        for box in main_results[0].boxes:
          x1, y1, x2, y2 = map(int, box.xyxy[0])
          main_bboxes.append([x1, y1, x2, y2])

        sub_results = detect_subobjects(frame, main_bboxes)
        output_json = format_to_json(main_results, sub_results)
        print(output_json)

        frame_count += 1
    fps = frame_count / (time.time() - start_time)
    print(f"Inference Speed: {fps:.2f} FPS")

    cap.release()

if __name__ == "__main__":
    main("Chandu.mp4")


Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.
Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8n.pt to 'yolov8n.pt'...


100%|██████████| 6.25M/6.25M [00:00<00:00, 111MB/s]


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
        "object": "motorcycle",
        "id": 0,
        "bbox": [
            0.0,
            171.2018585205078,
            734.3135375976562,
            519.25927734375
        ],
        "subobject": [
            {
                "object": "motorcycle",
                "id": 0,
                "bbox": [
                    361.4631652832031,
                    20.364730834960938,
                    729.36572265625,
                    344.0994567871094
                ]
            }
        ]
    }
]

0: 384x640 1 motorcycle, 193.0ms
Speed: 3.6ms preprocess, 193.0ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 motorcycle, 193.0ms
Speed: 3.7ms preprocess, 193.0ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)

0: 640x640 (no detections), 357.5ms
Speed: 4.4ms preprocess, 357.5ms inference, 1.5ms postprocess per image at shape (1, 3, 640, 640)
[
    {
        "ob

KeyboardInterrupt: 