In [1]:
#-- Matching Same Object --
#-- Predicting Direction ans Speed --
#-- Predicting Location of lost Objects --

In [2]:
from IPython import display

In [3]:
#-- Install ultralytics ------------------------------------------------------------------------------------------
!pip install filterpy
!pip install ultralytics

display.clear_output()

import ultralytics
ultralytics.checks()
#-----------------------------------------------------------------------------------------------------------------

Ultralytics 8.3.52 🚀 Python-3.10.14 torch-2.4.0 CUDA:0 (Tesla T4, 15095MiB)
Setup complete ✅ (4 CPUs, 31.4 GB RAM, 6037.6/8062.4 GB disk)


In [4]:
#-- Imports ----------------------------------------------------------------------------------------------------
from ultralytics import YOLO
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.models import Model
from collections import defaultdict
import cv2
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import os
from datetime import datetime, timedelta
import shutil
from filterpy.kalman import KalmanFilter
#-----------------------------------------------------------------------------------------------------------------

In [5]:
#-- Initialize --------------------------------------------------------------------------------------------------
out_dir = '/kaggle/working/'
detection_weights_file = '/kaggle/input/yolo11-11frozen-13/model_11_frozen_epoch_60/train/weights/best.pt'

drone_files = ['/kaggle/input/drone-dataset-p1/v_5.mp4']#,
              # '/kaggle/input/drone-dataset-p2/v_8.mp4',
              # '/k`aggle/input/drone-detection-test-videos-1/drone_video (1).mp4',
              # '/kaggle/input/novin-data/Novin_Dataset/f2.part2.mp4',
              # '/kaggle/input/sample-videos-detecting-and-matching-objs-1/sample_video_drone (5).mp4',
              # '/kaggle/input/video-drone-bird-1/Untitled-13.mp4']

results_dir = out_dir + 'results/'
if not os.path.exists(results_dir):
    os.makedirs(results_dir)

AREA_THRESHOLD = 5
DISTANCE_THRESHOLD = 50
SIMILARITY_THRESHOLD = 0.6
CROP_PADDING = 10
TIME_THRESHOLD = 60
NUM_TRACK_THRESHOLD = 30

#-----------------------------------------------------------------------------------------------------------------

In [6]:
#-- Set Detection Model ------------------------------------------------------------------------------------------
model = YOLO(detection_weights_file)  
#-----------------------------------------------------------------------------------------------------------------

In [7]:
#-- Set Similarity Measure Model ---------------------------------------------------------------------------------
similarity_base_model = ResNet50(weights='imagenet')

#-- Use the second-last layer for embeddings --
similarity_model = Model(inputs=similarity_base_model.input,
                         outputs=similarity_base_model.layers[-2].output)  
#-----------------------------------------------------------------------------------------------------------------

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels.h5
[1m102967424/102967424[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step


In [8]:
#-- Function to Preprocess Image for Similarity Measure ---------------------------------------------------------
def preprocess_image(image, target_size=(224, 224)):
    
    image = cv2.resize(image, target_size) 
    image = np.expand_dims(image, axis=0)  #-- Add batch dimension
    image = preprocess_input(image)  #-- Normalize for ResNet
    return image
#-----------------------------------------------------------------------------------------------------------------

In [9]:
#-- Function to Calculate Similarity -----------------------------------------------------------------------------
def compare_similarity_images(image1, image2):   
    
    #-- Preprocess images --
    img1 = preprocess_image(image1)
    img2 = preprocess_image(image2)
    
    #-- Extract features --
    embedding1 = similarity_model.predict(img1)
    embedding2 = similarity_model.predict(img2)

    #-- Compute cosine similarity --
    similarity_score = cosine_similarity(embedding1, embedding2)[0][0]     

    return similarity_score
#-----------------------------------------------------------------------------------------------------------------

In [10]:
#-- Function to Match Detected Objects ---------------------------------------------------------------------------
def match_object(track_id, track_box, track_image, track_time, last_tracked_objects):   
    
    # plt.imshow(track_image)
    # plt.title(f'track_object - id:{track_id}')
    # plt.axis('off')  
    # plt.show()    
    
    track_center_x, track_center_y, track_w, track_h = track_box     
    
    distance_match = {}
    similarity_match = {}
    
    matched_id = None
    
    for obj_id, (obj_box, obj_img, obj_time) in last_tracked_objects.items():
        
        # print(f'####################### {obj_id} #####################')
        # plt.imshow(obj_img)
        # plt.title(f'object- id:{obj_id}')
        # plt.axis('off')  
        # plt.show()    
        
        time_difference = abs(track_time - obj_time)
        # print(f'-------- time_difference: {time_difference} -------------')
        if time_difference > timedelta(seconds=TIME_THRESHOLD):            
            continue
        
        similarity_score = compare_similarity_images(track_image, obj_img)
        # print(f'-------- similarity_score: {similarity_score} -------------')
        if similarity_score < SIMILARITY_THRESHOLD:            
            continue
        
        x_center, y_center, w, h = obj_box     
        
        a_track = track_w * track_h
        a_obj = w *h        
        if a_track>a_obj:
            a_ratio = a_track/a_obj
        else:
            a_ratio = a_obj/a_track
        
        # print(f'-------- a_ratio: {a_ratio} -------------')
        if a_ratio > AREA_THRESHOLD:            
            continue         
        
        distance = np.sqrt((track_center_x - x_center)**2 + (track_center_y - y_center)**2)      
        # print(f'-------- distance: {distance} -------------')
        if distance <= DISTANCE_THRESHOLD:            
            distance_match[obj_id] = distance
        else:
            similarity_match[obj_id] = similarity_score
    
    if len(distance_match)!=0:
        matched_id = min(distance_match, key=distance_match.get) 
    elif len(similarity_match)!=0:
        matched_id = min(similarity_match, key=similarity_match.get)       
    
    # print(f'-------- matched_id: {matched_id} -------------')
    return matched_id

      
#-----------------------------------------------------------------------------------------------------------------

In [11]:
#-- Function To Crop Object Using Bounding Box ---------------------------------------------------------------
def crop_object(frame, box, padding=CROP_PADDING):
    
    frame_height, frame_width = frame.shape[:2]    
    center_x, center_y, w, h = box  
    
    top_left_x = int(max(center_x - w // 2 - padding, 0))
    top_left_y = int(max(center_y - h // 2 - padding, 0))
    bottom_right_x = int(min(center_x + w // 2 + padding, frame_width))
    bottom_right_y = int(min(center_y + h // 2 + padding, frame_height))
    
    cropped_object = frame[top_left_y:bottom_right_y, top_left_x:bottom_right_x].copy()
    
    return cropped_object
#-----------------------------------------------------------------------------------------------------------------

In [12]:
#-- Initialize Kalman Filter ----------------------------------------------------------------------------------- 
kalman_filters = {}

def initialize_kalman_filter():
    kf = KalmanFilter(dim_x=4, dim_z=2)  #-- 4 states (x, y, vx, vy), 2 measurements (x, y)
    kf.F = np.array([[1, 0, 1, 0],  #-- Transition matrix
                     [0, 1, 0, 1],
                     [0, 0, 1, 0],
                     [0, 0, 0, 1]])
    kf.H = np.array([[1, 0, 0, 0],  #-- Measurement function
                     [0, 1, 0, 0]])
    kf.P *= 10.0  #-- Initial uncertainty
    kf.R = np.eye(2) * 5  #-- Measurement noise
    kf.Q = np.eye(4) * 0.01  #-- Process noise
    return kf
#-----------------------------------------------------------------------------------------------------------------

In [None]:
#-- Run --------------------------------------------------------------------------------------------------------
for video_file in drone_files:

     #-- get video name --
    index = video_file.rfind('/')      
    video_name = video_file[index + 1:] 
    
    #-- set output file --
    out_video_name = 'out_' + video_name    
    output_path = results_dir + out_video_name
    
    print(f'=== Processing {video_name} ================================')
    
    cap = cv2.VideoCapture(video_file)
    
    #-- get video properties --
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))

    #-- set video writer --
    fourcc = cv2.VideoWriter_fourcc(*"mp4v")  
    out = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))

    
    track_history = defaultdict(lambda: []) #-- for tracking
    last_tracked_objects = {} #-- for matching   
    mapped_objects = {}
    frame_number = 0

    while cap.isOpened():
        success, frame = cap.read()
        if success:
            frame_number += 1
            # print(f'\nframe number = {frame_number} ==================================')

            current_objects = set()

             
            if frame_number>=2000:
                break 

            results = model.track(frame, tracker='bytetrack.yaml', persist=True, show=False)

            if results[0].boxes is not None and results[0].boxes.xywh is not None:
                boxes = results[0].boxes.xywh.cpu()
                track_ids = results[0].boxes.id

                if track_ids is not None:
                    track_ids = track_ids.int().cpu().tolist()

                    for box, track_id in zip(boxes, track_ids):                       
                        
                        #-- Crop the object from the frame --
                        cropped_object = crop_object(frame, box)
                        detection_time = datetime.now()

                        #-- Match objects using similarity function --
                        if len(last_tracked_objects) == 0:
                            last_tracked_objects[track_id] = (box, cropped_object, detection_time)
                        else:
                            if track_id in last_tracked_objects:
                                last_tracked_objects[track_id] = (box, cropped_object, detection_time)
                            else:
                                if track_id in mapped_objects:
                                    matched_id = mapped_objects[track_id]
                                else:
                                    matched_id = match_object(track_id, box, cropped_object, detection_time, last_tracked_objects)
                                if matched_id is not None:
                                    mapped_objects[track_id] = matched_id
                                    track_id = matched_id
                                last_tracked_objects[track_id] = (box, cropped_object, detection_time)

                        current_objects.add(track_id)
                        
                        #-- Init or Update Kalman Filter for object --
                        cx, cy, w, h = box
                        cx, cy = float(cx), float(cy)

                        #-- Initialize Kalman filter if it doesn't exist for this ID --
                        if track_id not in kalman_filters:
                            kf = initialize_kalman_filter()
                            kf.x = np.array([cx, cy, 0, 0])  #-- Initialize state
                            kalman_filters[track_id] = kf
                        #-- Update Kalman filter if it exists for this ID --
                        else:
                            kf = kalman_filters[track_id]
                            kf.update([cx, cy])  #-- Update Kalman filter with detection
                            kf.predict()  #-- Predict next state

                        #-- Update track history --
                        track = track_history[track_id]
                        track.append((float(cx), float(cy)))
                        if len(track) > NUM_TRACK_THRESHOLD:
                            track.pop(0)

                        #-- Draw tracking lines and bounding box --
                        points = np.array(track, dtype=np.int32).reshape((-1, 1, 2))
                        cv2.polylines(frame, [points], isClosed=False, color=(0, 0, 255), thickness=4)
                        top_left = (int(cx - w / 2), int(cy - h / 2))
                        bottom_right = (int(cx + w / 2), int(cy + h / 2))
                        cv2.rectangle(frame, top_left, bottom_right, (255, 0, 0), 2)
                        text_position = (int(cx - w / 2), int(cy - h / 2) - 10)
                        cv2.putText(frame, f'ID: {track_id}', text_position, cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 0), 2)

                #-- Handle missing detections: use Kalman prediction --
                for track_id, kf in kalman_filters.items():

                    if track_id in current_objects:
                        continue

                    
                    kf.predict()
                    predicted_x, predicted_y = kf.x[:2]

                    #-- Draw predicted position --
                    track = track_history[track_id]
                    track.append((predicted_x, predicted_y))
                    if len(track) > NUM_TRACK_THRESHOLD:
                        track.pop(0)

                    points = np.array(track, dtype=np.int32).reshape((-1, 1, 2))
                    cv2.polylines(frame, [points], isClosed=False, color=(0, 255, 0), thickness=4)

                    #-- Draw predicted Box --
                    if track_id in last_tracked_objects:
                        cx, cy, w, h = last_tracked_objects[track_id][0]
                        cx, cy =  predicted_x, predicted_y
                        top_left = (int(cx - w / 2), int(cy - h / 2))
                        bottom_right = (int(cx + w / 2), int(cy + h / 2))
                        cv2.rectangle(frame, top_left, bottom_right, (0, 255, 0), 2)
                        text_position = (int(cx - w / 2), int(cy - h / 2) - 10)
                        cv2.putText(frame, f'ID: {track_id}', text_position, cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)

            out.write(frame)

        else:
            break

    cap.release()
    out.release()
#-----------------------------------------------------------------------------------------------------------------

[31m[1mrequirements:[0m Ultralytics requirement ['lap>=0.5.12'] not found, attempting AutoUpdate...
Collecting lap>=0.5.12
  Downloading lap-0.5.12-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.2 kB)
Downloading lap-0.5.12-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m26.8 MB/s[0m eta [36m0:00:00[0m [36m0:00:01[0m
[?25hInstalling collected packages: lap
Successfully installed lap-0.5.12

[31m[1mrequirements:[0m AutoUpdate success ✅ 7.2s, installed 1 package: ['lap>=0.5.12']
[31m[1mrequirements:[0m ⚠️ [1mRestart runtime or rerun command for updates to take effect[0m


0: 384x640 1 drone, 89.9ms
Speed: 11.1ms preprocess, 89.9ms inference, 241.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 drone, 47.5ms
Speed: 2.9ms preprocess, 47.5ms inference, 1.2m

I0000 00:00:1734762171.226366     106 service.cc:145] XLA service 0x7beea4010800 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1734762171.226424     106 service.cc:153]   StreamExecutor device (0): Tesla T4, Compute Capability 7.5
I0000 00:00:1734762171.226430     106 service.cc:153]   StreamExecutor device (1): Tesla T4, Compute Capability 7.5


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step

0: 384x640 2 drones, 28.8ms
Speed: 2.4ms preprocess, 28.8ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)



I0000 00:00:1734762174.156169     106 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


0: 384x640 1 drone, 29.9ms
Speed: 3.2ms preprocess, 29.9ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 drone, 29.2ms
Speed: 3.2ms preprocess, 29.2ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 drones, 29.9ms
Speed: 3.2ms preprocess, 29.9ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 drones, 30.6ms
Speed: 2.4ms preprocess, 30.6ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 drones, 30.4ms
Speed: 2.5ms preprocess, 30.4ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 drones, 30.1ms
Speed: 3.8ms preprocess, 30.1ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 drone, 30.5ms
Speed: 2.7ms preprocess, 30.5ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 drone, 30.4ms
Speed: 2.6ms preprocess, 30.4ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 64

In [None]:
# zip_results = "results"
# shutil.make_archive(zip_results, 'zip', results_dir)