In [1]:
#-- Install ultralytics for YOLO World --------------------------------------------------------------------------
!pip install ultralytics

from IPython import display
display.clear_output()

import ultralytics
ultralytics.checks()
#---------------------------------------------------------------------------------------------------------------

Ultralytics YOLOv8.2.51 🚀 Python-3.10.13 torch-2.1.2 CUDA:0 (Tesla T4, 15095MiB)
Setup complete ✅ (4 CPUs, 31.4 GB RAM, 5689.3/8062.4 GB disk)


In [2]:
#-- Install deep-sort for obj tracking --------------------------------------------------------------------------

!pip install deep-sort-realtime

display.clear_output()
#---------------------------------------------------------------------------------------------------------------

In [3]:
#-- Import -----------------------------------------------------------------------------------------------
from ultralytics import YOLO

from deep_sort_realtime.deepsort_tracker import DeepSort

import torch

import cv2
from PIL import Image

import matplotlib.pyplot as plt

import os
import shutil

import numpy as np
#---------------------------------------------------------------------------------------------------------------

In [4]:
#-- Initialize ---------------------------------------------------------------------------------------------------
out_path = '/kaggle/working/'
intput_path = '/kaggle/input/'

lbls_file = intput_path + 'all-labels/all_labels.txt'

drone_detector_weights_file = intput_path + 'drone-detection-yolov8-best-weights/best.pt'

input_video_dir = intput_path + 'sample-videos-detecting-and-matching-objs-1/'
result_video_dir = out_path + 'result_videos/'

YOLO_Wordl_CONF_THRESHOLD = 0.25
YOLO_Wordl_IOU_THRESHOLD = 0.5

Drone_Detector_CONF_THRESHOLD = 0.25
Drone_Detector_IOU_THRESHOLD = 0.5

MOTION_THRESHOLD = 10
IOU_THRESHOLD = 0.5

DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('device:' , DEVICE)
#---------------------------------------------------------------------------------------------------------------

device: cuda


In [5]:
#-- Create Dir for saving Results ---------------------------------------------------------------------------------
os.makedirs(result_video_dir, exist_ok=True)
#-----------------------------------------------------------------------------------------------------------------

In [6]:
#-- Set labels for ZSOD Models ------------------------------------------------------------------------------------
all_labels = ['drone', 'UAV', 'Unmanned Aerial Vehicle', 'Quadcopter']
# all_labels = ['person']            

yolo_all_labels = all_labels
#-----------------------------------------------------------------------------------------------------------------

In [7]:
#-- Create and Initialize Models ----------------------------------------------------------------------------------
#-- YOLO World --
model_yolo_world_zsod = YOLO('yolov8x-worldv2.pt')
model_yolo_world_zsod.set_classes(yolo_all_labels)


#-- Custome Model for Drone Detection --
model_drone_detector_yolov8 = YOLO(drone_detector_weights_file) 

#-- background subtractor --
back_sub = cv2.createBackgroundSubtractorMOG2(history=500, varThreshold=100, detectShadows=False)

display.clear_output()
print('All models loaded successfully :)')
#-----------------------------------------------------------------------------------------------------------------

All models loaded successfully :)


In [8]:
#-- calculate IOU for 2 Deteced Objects --------------------------------------------------------------------------
def calculate_iou(box1, box2):
    
    x1_1, y1_1, x2_1, y2_1 = box1
    x1_2, y1_2, x2_2, y2_2 = box2
    
    x1_intersection = max(x1_1, x1_2)
    y1_intersection = max(y1_1, y1_2)
    x2_intersection = min(x2_1, x2_2)
    y2_intersection = min(y2_1, y2_2)
   
    intersection_area = max(0, x2_intersection - x1_intersection + 1) * max(0, y2_intersection - y1_intersection + 1)    
    box1_area = (x2_1 - x1_1 + 1) * (y2_1 - y1_1 + 1)
    box2_area = (x2_2 - x1_2 + 1) * (y2_2 - y1_2 + 1)    
    union_area = box1_area + box2_area - intersection_area
    
    iou = intersection_area / union_area

    return iou
#-----------------------------------------------------------------------------------------------------------------

In [9]:
#-- Run ------------------------------------------------------------------------------------------------------------------
#-- Initialize DeepSort --
tracker = DeepSort(max_age=30)

#-- Load video file --
video_path = '/kaggle/input/sample-videos-detecting-and-matching-objs-1/sample_video_drone (3).mp4'
cap = cv2.VideoCapture(video_path)

#-- Get video properties --
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS))
fourcc = cv2.VideoWriter_fourcc(*'mp4v')

#-- Create video writer to save output video --
out = cv2.VideoWriter('output_video_sample_video_drone (3).mp4', fourcc, fps, (width, height)) 

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    #-- Detect Drone objects --
    results = model_drone_detector_yolov8.predict(source=frame,
                                                  conf=Drone_Detector_CONF_THRESHOLD,
                                                  iou=Drone_Detector_IOU_THRESHOLD,
                                                  show=False,
                                                  save=False)
    
#     #-- Detect Person objects --
#     results = model_yolo_world_zsod.predict(source=frame,
#                                             conf=YOLO_Wordl_CONF_THRESHOLD,
#                                             iou=YOLO_Wordl_IOU_THRESHOLD,
#                                             show=False,
#                                             save=False)

            
    #-- Extract bounding boxes and confidences --
    detections = []
    i = 0
    for result in results:
        for box in result.boxes:
            bbox = box.xyxy[0].tolist()
            conf = box.conf.tolist()[0]
            detections.append((bbox, conf, 'drone'))  
#             detections.append((bbox, conf, 'person'))  
            
            
            bbox = [int(coord) for coord in bbox]
            cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 0, 0), 2)        
            cv2.putText(frame, f'I: {i}', (bbox[0], bbox[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)
            
            i += 1

    #-- Update tracker with detections --
    tracks = tracker.update_tracks(detections, frame=frame)

    #-- Draw bounding boxes for tracked objects --
    for track in tracks:
        if not track.is_confirmed():
            continue
        track_id = track.track_id
        ltrb = track.to_ltrb()
        bbox = [int(coord) for coord in ltrb]

        #-- Draw bounding box --
        cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 2)
        cv2.putText(frame, f'ID: {track_id}', (bbox[0], bbox[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    #-- Write frame to output video --
    out.write(frame)

#-- Release resources --
cap.release()
out.release()



display.clear_output()
print(':)')

:)


In [10]:
# for video_file in os.listdir(input_video_dir):      
    
#     if 'human' not in video_file:
#         continue
        
#     if video_file != 'sample_video_human (2).mp4':
#         continue
    

#     #-- log --
#     print(f'Processing {video_file} ==========================================================')
    
#     tracker = DeepSort(max_age=30, n_init=3)
    
#     #-- Create Folder for saving results --
#     dot_index = video_file.rfind('.')   
#     video_result_dir_name = 'result_for_' + video_file[:dot_index]
#     video_result_dir_path = result_video_dir + video_result_dir_name + '/'
#     os.makedirs(video_result_dir_path, exist_ok=True)
        
#     #-- load video --
#     video_path = os.path.join(input_video_dir, video_file)    
#     video = cv2.VideoCapture(video_path)
    
#     #-- Get number of frames and fps -- 
#     number_of_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
#     fps = int(video.get(cv2.CAP_PROP_FPS))
#     print(f'number_of_frames: {number_of_frames}\nfps: {fps}')
    
    

#     #-- Get the width and height of the frames --
#     frame_width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
#     frame_height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
    
#     # Define the time interval t in seconds
#     t = 2    
#     frame_interval = t * fps
    
#     unique_objects_id = set()    
#     unique_objects_tracks = dict()  
#     time_obj = dict()
#     track_obj = dict()
    
    
    
#     #-- Initialize VideoWriter to save the output video --
#     result_video = cv2.VideoWriter(video_result_dir_path + video_file[:dot_index] + '.avi',
#                                    cv2.VideoWriter_fourcc(*'XVID'),
#                                    fps,
#                                    (frame_width, frame_height))    
    
    
#     #-- Run Object Detection Models Frame by Frame --
#     frame_number = 0
#     while video.isOpened():
#         ret, frame = video.read()
#         if not ret:
#             break
        
#         main_frame = frame.copy()
#         frame_number += 1   
#         #-- log --
#         print(f'\tProcessing frame {frame_number} ------------------------------')       
        
        
#         #-- Apply background subtraction --
#         fg_mask = back_sub.apply(frame)        
        
#         #-- show some frames --
#         if frame_number % frame_interval == 0:
# #         if frame_number % (number_of_frames//5) == 0:
#             plt.figure(figsize=(5, 5))
#             plt.imshow(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
#             plt.axis('off')
#             plt.title(f'main frame - frame number={frame_number}')
#             file_name = f'main_frame_{frame_number}.png'
#             plt.savefig(video_result_dir_path + file_name)
#             plt.show()

#             plt.figure(figsize=(5, 5))
#             plt.imshow(cv2.cvtColor(fg_mask, cv2.COLOR_BGR2RGB))
#             plt.axis('off')
#             plt.title(f'fg_mask - frame number={frame_number}')
#             file_name = f'fg_mask_{frame_number}.png'
#             plt.savefig(video_result_dir_path + file_name)
#             plt.show()
        
#         #-- Detect objects by YOLO-World --
#         results_yolo_world_zsod = model_yolo_world_zsod.predict(source=frame,
#                                                                 conf=YOLO_Wordl_CONF_THRESHOLD,
#                                                                 iou=YOLO_Wordl_IOU_THRESHOLD,
#                                                                 show=False,
#                                                                 save=False)
               
        
#         #-- Get only moving objects from results_yolo_world_zsod --
#         n_all_objs_yolo_world_zsod = 0
#         moving_objects_yolo_world_zsod = []        
#         for result in results_yolo_world_zsod:
#             for box in result.boxes:  
#                 n_all_objs_yolo_world_zsod += 1
                
#                 class_id = int(box.cls)                 
#                 label = yolo_all_labels[class_id]    
#                 bbox = box.xyxy.tolist()[0]            
#                 x1, y1, x2, y2 = map(int, bbox)                

#                 #-- Check if the detected object has motion --
#                 if fg_mask[y1:y2, x1:x2].mean() > MOTION_THRESHOLD:  
#                     moving_objects_yolo_world_zsod.append((x1, y1, x2, y2, label))           
 
            
        
#         #-- log --
#         print(f' frame number: {frame_number} ------------------------------------------------------')
#         print(f'all objs:\tyolo_world:{n_all_objs_yolo_world_zsod}')
#         print(f'moving objs:\tyolo_world:{len(moving_objects_yolo_world_zsod)}')
#         print('--------------------------------------------------------------------------')
        
#         moving_objects = moving_objects_yolo_world_zsod
        
#         detections = []              
#         for (x1, y1, x2, y2, label) in moving_objects:
#             w = int(x2 - x1)
#             h = int(y2 - y1)
#             x_center = int(x1 + w / 2)
#             y_center = int(y1 + h / 2)
            
#             bbox = [x_center, y_center, w, h]          
            
#             score = 0.9
#             detections.append(((bbox),score,label))    
        
#         t_ids = set()
#         for obj in detections:
#             tracker.update_tracks([obj], frame = main_frame)     
            
#             for track in tracker.tracker.tracks:   
#                 if track.track_id not in t_ids:
#                     t_ids.add(track.track_id)
#                     track_obj[track.track_id] = obj
            
            
        
#         print(f' frame number: {frame_number} +++++++++++++++++++++++++++++++++++++++++++++')
#         for t_id , obj in   track_obj.items():
#             print(t_id, obj[2])
#         print('++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++')
        
        
#         tracks = tracker.update_tracks(detections, frame = main_frame)        

#         # Collect unique object IDs
#         n_tracks = 0
#         ids = set()
        
#         for track in tracker.tracker.tracks:    
#             n_tracks += 1
#             ids.add(track.track_id)
           
#             unique_objects_id.add(track.track_id)
#             if track.track_id not in unique_objects_tracks:
#                 unique_objects_tracks[track.track_id] = track
        
#         print(f' frame number: {frame_number} +++++++++++++++++++++++++++++++++++++++++++++')
#         print(f'detections: {len(detections)}')        
#         print(f'n_tracks: {n_tracks}')   
#         print(f'tracks:\n {len(tracks)}')
#         print(f'ids in this frame: {ids}')
#         print(f'ids in interval: {unique_objects_id}')        
#         print('++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++')
        
        
#         # If reached the frame interval, print unique objects and reset
#         if frame_number % frame_interval == 0:
#             print(f"Unique objects in the last {t} seconds:", len(unique_objects_id))
# #             time_obj[frame_number / frame_interval] = unique_objects.copy()
#             time_obj[frame_number / frame_interval] = unique_objects_tracks.copy()
            
#             unique_objects_id.clear()
#             unique_objects_tracks = {}
        
        
#         if frame_number % frame_interval == 0:
            
#             all_tracks = []        
#             for track in tracker.tracker.tracks:    
#                 if track.is_confirmed() and track.time_since_update == 0:
#                     all_tracks.append(track)
# #             for id,t in time_obj[frame_number / frame_interval].items():
# #                 all_tracks.append(t)
                
# #             img = draw_boxes(frame.copy(), tracker.tracker.tracks)
#             img = draw_boxes(frame.copy(), all_tracks)
#             plt.figure(figsize=(10, 10))
#             plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
#             plt.axis('off')
#             plt.title(f'tracking - frame number={frame_number}')
# #             file_name = f'main_frame_{frame_number}.png'
# #             plt.savefig(video_result_dir_path + file_name)
#             plt.show()
                
                
#         #-- plot bounding box for moving objects on the frame --
#         for i, (x1, y1, x2, y2, label) in enumerate(moving_objects):            
#             #-- crop detected object --
#             cropped_object = main_frame[y1:y2, x1:x2]
            
#             #-- save croped object --
#             if frame_number % (number_of_frames//5) == 0:
#                 file_name = f'frame_{frame_number}_{i}_{label}.png'            
#                 cv2.imwrite(video_result_dir_path + file_name, cropped_object)
            
#             #-- show cropped object --
#             if frame_number % (number_of_frames//5) == 0:
#                 plt.figure(figsize=(3, 3))
#                 plt.imshow(cropped_object)
#                 plt.axis('off')
#                 plt.title(label)
#                 plt.show
            
#             #-- plot bbox on the frame --
#             color = (255, 0, 0)
#             if 'yolov8' in label:
#                 color = (255, 0, 0)
#             elif 'yolo_world' in label:
#                 color = (0, 255, 0)
#             elif 'dino' in label:
#                 color = (0, 0, 255)
            
                
#             cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)  #-- Red box with thickness 2 --               
#             cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
            
# #             print('%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%')
# #             print(x1,y1,x2,y2)
# #             print('%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%')
            
            
            
#         #-- Add frame to result video --
#         result_video.write(frame)

#         #-- show some frames --
#         if frame_number % frame_interval == 0:
# #         if frame_number % (number_of_frames//5) == 0:
#             plt.figure(figsize=(10, 10))
#             plt.imshow(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
#             plt.axis('off')
#             plt.title(f'moving objcs - frame number={frame_number}')
#             file_name = f'moving_objcs_{frame_number}.png'
#             plt.savefig(video_result_dir_path + file_name)
#             plt.show()   
    
        
        
#     #-- zip results --
#     shutil.make_archive(out_path+video_result_dir_name, 'zip', video_result_dir_path)    

#     #-- release videos --
#     video.release()
#     result_video.release()    
    
#     print(time_obj)
    
#     break
    
    


# #-- remove folders --
# shutil.rmtree(result_video_dir)
# # display.clear_output()   
# print(':)')
# #-----------------------------------------------------------------------------------------------------------------    