In [1]:
#-- Install ultralytics for YOLO  --------------------------------------------------------------------------------
!pip install ultralytics

from IPython import display
display.clear_output()

import ultralytics
ultralytics.checks()
#---------------------------------------------------------------------------------------------------------------

Ultralytics 8.3.58 🚀 Python-3.10.13 torch-2.1.2 CUDA:0 (Tesla T4, 15095MiB)
Setup complete ✅ (4 CPUs, 31.4 GB RAM, 6037.6/8062.4 GB disk)


In [2]:
#-- Import libraries  ------------------------------------------------------------------------------------------
from ultralytics import YOLO
import torch
import cv2
from PIL import Image
import matplotlib.pyplot as plt
import os
import shutil
import numpy as np
import pandas as pd
import csv
import random
#---------------------------------------------------------------------------------------------------------------

In [3]:
#-- Initialize ---------------------------------------------------------------------------------------------------
burglary_samples_dir = '/kaggle/input/novin-create-binary-burglary-ds/burglary_samples/'
not_burglary_samples_dir = '/kaggle/input/novin-create-binary-burglary-ds/not_burglary_samples/'

CONF_THRESHOLD = 0.2
IOU_THRESHOLD = 0.5

BRUGLARY_THRESHOLD_PERCENT = 0.1

DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('device:' , DEVICE)
#---------------------------------------------------------------------------------------------------------------

device: cuda


In [4]:
#-- Get path for all videos as a list ---------------------------------------------------------------------------
#-- Burglary samples --
burglary_videos = []

for dirpath, _, filenames in os.walk(burglary_samples_dir):
    for filename in filenames:
        full_path = os.path.join(dirpath, filename)
        burglary_videos.append(full_path)

#-- Not Burglary Samples --
not_burglary_videos = []

for dirpath, _, filenames in os.walk(not_burglary_samples_dir):
    for filename in filenames:
        full_path = os.path.join(dirpath, filename)
        not_burglary_videos.append(full_path)

burglary_videos.sort()
not_burglary_videos.sort()

print(f'Burglary Samples: {len(burglary_videos)}')
print(f'Not-Burglary Samples: {len(not_burglary_videos)}')
#---------------------------------------------------------------------------------------------------------------

Burglary Samples: 34
Not-Burglary Samples: 34


In [5]:
#-- Set label prompts for ZSOD Models ------------------------------------------------------------------------------------
labels = [
    "Person climbing over a fence",
    "Person climbing a wall",
    "Person breaking a lock with tools",
    "Person trying to pick a lock",
    "Person forcing a door open with strength",          
    "Person hiding behind an object",    
    "Person running away from a building",
    "Person carrying tools like a crowbar",
    "Person breaking a window with an object",
    "Person tampering with a security camera",
    "Person cutting alarm wires",   
    "Person jumping out of a window",
    "Person disabling an alarm system",
    "Person wearing a mask and avoiding detection"
] 
#-----------------------------------------------------------------------------------------------------------------

In [6]:
#-- Create and Initialize Model ----------------------------------------------------------------------------------

#-- YOLO World (Zero-Shot Model) --
model_burglary_detection = YOLO('yolov8x-worldv2.pt')
model_burglary_detection.set_classes(labels)

display.clear_output()
print('YOLO-world model was loaded successfully :)')
#-----------------------------------------------------------------------------------------------------------------

YOLO-world model was loaded successfully :)


In [7]:
#-- create an empty df for saving reults --------------------------------------------------------------------
columns = ["video_file", "true_label", "predicted_label", "all_detected_prompts", "burglary_threshold"]
df_result = pd.DataFrame(columns=columns)
print(df_result.shape)
#------------------------------------------------------------------------------------------------------------

(0, 5)


In [8]:
video_labels_dict = {}
video_all_detections = {}
video_threshold = {}

for video_path in burglary_videos:    

    index = video_path.rfind('/')
    video_file = video_path[index+1:]
    index = video_file.rfind('.') 
    video_name = video_file[:index]    
    
    #-- log --
    print(f'Processing {video_file} ==========================================================') 
         

    #-- Calculate total frames and set Threshold --
    cap = cv2.VideoCapture(video_path)    
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    cap.release()
    
    BRUGLARY_THRESHOLD = int(total_frames * BRUGLARY_THRESHOLD_PERCENT)
    video_threshold[video_file] = BRUGLARY_THRESHOLD
    print(f'BRUGLARY_THRESHOLD: {BRUGLARY_THRESHOLD}')

    #-- count number of detected prompts --
    labels_count = {}
       
    
    for r in model_burglary_detection.predict(source = video_path,
                                           conf = CONF_THRESHOLD,
                                           iou = IOU_THRESHOLD,
                                           show = False,
                                           save= False,
                                           stream = True) :        
        
        for cls_index in r.boxes.cls.int().tolist():
            if labels[cls_index] in labels_count:
                labels_count[labels[cls_index]] += 1
            else:
                labels_count[labels[cls_index]] = 1       
        

        pass

    final_labels_list = []
    all_detection_list = []
    for cls_lbl, count in labels_count.items():
        all_detection_list.append((cls_lbl, count))        
        
        if count >= BRUGLARY_THRESHOLD:            
            final_labels_list.append((cls_lbl, count))
    
    
    video_labels_dict[video_file] = final_labels_list
    video_all_detections[video_file] = all_detection_list

    
    
    
    
display.clear_output()
print(':)')


:)


In [9]:
true_label = "burglary"
for video, lbls_list in video_all_detections.items():   
    all_detected_prompts = lbls_list
    if len(video_labels_dict[video]) > 0:
        predicted_label = "burglary"
    else:
        predicted_label = "not burglary"
    
    df_result = pd.concat([df_result, pd.DataFrame([{
        "video_file": video,
        "true_label": true_label,
        "predicted_label": predicted_label,
        "all_detected_prompts": all_detected_prompts,
        "burglary_threshold": video_threshold[video]
    }])], ignore_index=True)

print(df_result)
print(df_result.shape)

       video_file true_label predicted_label  \
0  burglary_1.mp4   burglary        burglary   

                                all_detected_prompts burglary_threshold  
0  [(Person tampering with a security camera, 17)...                110  
(1, 5)


In [10]:
video_labels_dict = {}
video_all_detections = {}
video_threshold = {}

for video_path in not_burglary_videos:    

    index = video_path.rfind('/')
    video_file = video_path[index+1:]
    index = video_file.rfind('.') 
    video_name = video_file[:index]    
    
    #-- log --
    print(f'Processing {video_file} ==========================================================')    

    #-- Calculate total frames and set Threshold --
    cap = cv2.VideoCapture(video_path)    
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    cap.release()

    BRUGLARY_THRESHOLD = int(total_frames * BRUGLARY_THRESHOLD_PERCENT)
    video_threshold[video_file] = BRUGLARY_THRESHOLD
    print(f'BRUGLARY_THRESHOLD: {BRUGLARY_THRESHOLD}')

    #-- count number of detected prompts --
    labels_count = {}
       
    
    for r in model_burglary_detection.predict(source = video_path,
                                           conf = CONF_THRESHOLD,
                                           iou = IOU_THRESHOLD,
                                           show = False,
                                           save= False,
                                           stream = True) :        
        
        for cls_index in r.boxes.cls.int().tolist():
            if labels[cls_index] in labels_count:
                labels_count[labels[cls_index]] += 1
            else:
                labels_count[labels[cls_index]] = 1       

        
        pass

    final_labels_list = []
    all_detection_list = []
    for cls_lbl, count in labels_count.items():
        all_detection_list.append((cls_lbl, count))        
        
        if count >= BRUGLARY_THRESHOLD:            
            final_labels_list.append((cls_lbl, count))
    
    
    video_labels_dict[video_file] = final_labels_list
    video_all_detections[video_file] = all_detection_list

    
    
    
    
display.clear_output()
print(':)')


:)


In [11]:
true_label = "not burglary"
for video, lbls_list in video_all_detections.items():   
    all_detected_prompts = lbls_list
    if len(video_labels_dict[video]) > 0:
        predicted_label = "burglary"
    else:
        predicted_label = "not burglary"
    
    df_result = pd.concat([df_result, pd.DataFrame([{
        "video_file": video,
        "true_label": true_label,
        "predicted_label": predicted_label,
        "all_detected_prompts": all_detected_prompts,
        "burglary_threshold": video_threshold[video]
    }])], ignore_index=True)

print(df_result)
print(df_result.shape)

           video_file    true_label predicted_label  \
0      burglary_1.mp4      burglary        burglary   
1  not_burglary_1.mp4  not burglary    not burglary   

                                all_detected_prompts burglary_threshold  
0  [(Person tampering with a security camera, 17)...                110  
1                                                 []                 46  
(2, 5)


In [12]:
df_result.to_csv('results.csv', index = False)