In [1]:
import os
import orjson

from models import multiscale_model as multiscale
from models import model
from models import track_model
from models import simple_track
from src.utils import visualize_detection_video, visualize_tracking_video

In [2]:
videos_folder = "data/vipcup/RGB"
model_path = "checkpoints/RGB/yolov8n2/best.pt"
output_folder = "outputs/image_test/RGB"

# videos_folder = "data/Test_detection_tracking/RGB/videos"
# model_path = "checkpoints/RGB/yolov8n2/best.pt"
# output_folder = "outputs/track_test/RGB"

# videos_folder = "/home/cvpr2025/vipcup2025/data/Test_detection_tracking/IR/videos"
# model_path = "checkpoints/IR/yolov8n/best.pt"
# output_folder = "outputs/track_test_noCT/IR"

In [3]:
videos_path = [
    os.path.join(videos_folder, video)
    for video in os.listdir(videos_folder)
    if video.endswith(('.mp4', '.avi', '.mov'))
]

In [4]:
videos_path

['data/vipcup/RGB/V_BIRD_046105.mp4',
 'data/vipcup/RGB/V_BIRD_048107.mp4',
 'data/vipcup/RGB/V_BIRD_049108.mp4',
 'data/vipcup/RGB/V_DRONE_098208.mp4',
 'data/vipcup/RGB/V_DRONE_100210.mp4',
 'data/vipcup/RGB/V_DRONE_102212.mp4',
 'data/vipcup/RGB/V_DRONE_103213.mp4',
 'data/vipcup/RGB/V_DRONE_106216.mp4',
 'data/vipcup/RGB/V_DRONE_107217.mp4',
 'data/vipcup/RGB/V_DRONE_109219.mp4',
 'data/vipcup/RGB/V_DRONE_110220.mp4',
 'data/vipcup/RGB/V_DRONE_112222.mp4',
 'data/vipcup/RGB/V_DRONE_113223.mp4',
 'data/vipcup/RGB/V_BIRD_00564.mp4',
 'data/vipcup/RGB/V_BIRD_03796.mp4',
 'data/vipcup/RGB/V_DRONE_010120.mp4',
 'data/vipcup/RGB/V_DRONE_084194.mp4',
 'data/vipcup/RGB/V_DRONE_092202.mp4']

In [5]:
multiscale_model = multiscale.DetectionModel(
                model_path=model_path,
                device="cuda"
            )
multiscale_track_model = track_model.TrackingModel(
                multiscale_model,
                # use_compensation=False,
            )
# multiscale_track_model = simple_track.CustomTrackingModel(
#                 multiscale_model
#             )

In [6]:
def save_log(
            detections,
            video_name,
            conf_threshold,
            iou_threshold,
            output_path
        ):
    new_detections = detections.copy()
    for frame_id, frame in enumerate(new_detections):
        for idx, (bbox, score, label) in enumerate(zip(frame["boxes"], frame["scores"], frame["labels"])):
            new_detections[frame_id]["boxes"][idx] = [float(coord) for coord in bbox]
            new_detections[frame_id]["scores"][idx] = float(score)
            new_detections[frame_id]["labels"][idx] = int(label)
            
    output = {
        "video_name": video_name,
        "conf_threshold": conf_threshold,
        "iou_threshold": iou_threshold,
        "frames": detections,
    }
    with open(output_path, "wb") as f:
        f.write(orjson.dumps(output, option=orjson.OPT_INDENT_2))
    print(f"Saved log to {output_path}")
    return output_path

In [7]:
# for iou in [0.1]:
#     for conf in [0.1, 0.2, 0.3, 0.4]:
#         print(f"Running with iou: {iou}, conf: {conf}")
#         for path in videos_path:
#             video_name = os.path.splitext(os.path.basename(path))[0]
#             print(f"Processing video: {video_name}")
#             output_path = f"{output_folder}/iou_{iou}_conf_{conf}"
            
#             multiscale_det = multiscale_model.video_detect(
#                                                     path,
#                                                     conf_threshold=conf,
#                                                     iou_threshold=iou    
#                                                 )    
#             visualize_detection_video(
#                 video_path=path,
#                 detection_frames=multiscale_det,
#                 output_path=f"{output_path}/{video_name}.mp4"
#             )
#             save_log(
#                 detections=multiscale_det,
#                 video_name=video_name,
#                 conf_threshold=conf,
#                 iou_threshold=iou,
#                 output_path=f"{output_path}/{video_name}.json",
#             )

In [8]:
# yolo_model = model.DetectionModel(
#                             model_path,
#                             conf_threshold=0.3,
#                             iou_threshold=0.45
#                             )
# multiscale_model = multiscale.DetectionModel(
#                                     model_path,
#                                     conf_threshold=0.3,
#                                     iou_threshold=0.1
#                                     )
# yolo_track_model = track_model.TrackingModel(
#                             yolo_model,
#                             )
# multiscale_track_model = track_model.TrackingModel(
#                             multiscale_model,
#                             )

In [9]:
for path in videos_path:
    video_name = os.path.basename(path).removesuffix('.mp4').removesuffix('.avi').removesuffix('.mov')
    print(f"Processing video: {video_name}")
    
    # # Detection
    # multiscale_det = multiscale_model.video_detect(path)
    
    # visualize_detection_video(
    #     video_path=path,
    #     detection_frames=multiscale_det,
    #     output_path=f"{output_folder}/detection/{video_name}.mp4"
    # )
    
    # # Tracking
    multiscale_track = multiscale_track_model.video_track(
                                                        path,
                                                        conf_threshold=0.2,
                                                        iou_threshold=0.1,
                                                        )

    visualize_tracking_video(
        video_path=path,
        tracking_frames=multiscale_track,
        output_path=f"{output_folder}/{video_name}.mp4"
    )

Processing video: V_BIRD_046105
Frame 0: Detected 1 objects
Frame 1: Detected 1 objects
Frame 1: Active: 1, Lost: 0, Recovered: 0, Valid: 0
Frame 2: Detected 1 objects
Frame 2: Active: 1, Lost: 0, Recovered: 0, Valid: 0
Frame 3: Detected 1 objects
Frame 3: Active: 1, Lost: 0, Recovered: 0, Valid: 0
Frame 4: Detected 1 objects
Frame 4: Active: 1, Lost: 0, Recovered: 0, Valid: 0
Frame 5: Detected 1 objects
Frame 5: Active: 1, Lost: 0, Recovered: 0, Valid: 0
Frame 6: Detected 1 objects
Frame 6: Active: 1, Lost: 0, Recovered: 0, Valid: 0
Frame 7: Detected 1 objects
Frame 7: Active: 1, Lost: 0, Recovered: 0, Valid: 0
Frame 8: Detected 1 objects
Frame 8: Active: 1, Lost: 0, Recovered: 0, Valid: 0
Frame 9: Detected 1 objects
Frame 9: Active: 1, Lost: 0, Recovered: 0, Valid: 0
Frame 10: Detected 1 objects
Frame 10: Active: 1, Lost: 0, Recovered: 0, Valid: 0
Frame 11: Detected 1 objects
Frame 11: Active: 1, Lost: 0, Recovered: 0, Valid: 0
Frame 12: Detected 1 objects
Frame 12: Active: 1, Lost: 