In [1]:
import numpy as np
import pandas as pd
import os
import torch
from PIL import Image
import json
from multiprocessing import Pool, cpu_count
from moviepy.editor import VideoFileClip
import time

# Load YOLOv5 model
yolo_model = torch.hub.load('ultralytics/yolov5', 'yolov5s')

def extract_key_frames(video_path, num_key_frames=25):
    print(f"Extracting key frames from {video_path}")
    clip = VideoFileClip(video_path)
    duration = clip.duration
    frames = [clip.get_frame(t) for t in np.linspace(0, duration, num_key_frames)]
    return frames

def detect_objects_in_frames(frames):
    detected_objects = []
    for frame in frames:
        print("Detecting objects in a frame...")
        img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
        results = yolo_model(img)
        detected_objects.extend(results.pandas().xyxy[0]['name'].tolist())
    return list(set(detected_objects))

def process_video(video_info):
    video_id, video_path = video_info
    if os.path.exists(video_path):
        key_frames = extract_key_frames(video_path)
        detected_objects = detect_objects_in_frames(key_frames)
        return video_id, detected_objects
    else:
        return video_id, []


df = pd.read_excel("FINAL_IMSyPP_EN_644_PREPROCESSED_no_restricted_videos_features_2ktoken_w_prompt_u_desc_audio.xlsx")

# For testing, limit the number of videos processed
df = df[:2]

df['detected_objects'] = ""

video_directory = "NEW_Hatebase_dataset_downloaded_videos"
video_info_list = [(row['video_id'], os.path.join(video_directory, f"{row['video_id']}.mp4")) for _, row in df.iterrows()]

print("Starting video processing...")
start_time = time.time()

with Pool(cpu_count()) as pool:
    results = pool.map(process_video, video_info_list)

end_time = time.time()
print(f"Video processing completed in {end_time - start_time:.2f} seconds")

detected_objects = {video_id: objects for video_id, objects in results}

for index, row in df.iterrows():
    video_id = row['video_id']
    df.at[index, 'detected_objects'] = json.dumps(detected_objects[video_id])

# Uncomment the line below to save the results to an Excel file
# df.to_excel("FINAL_IMSyPP_EN_644_PREPROCESSED_no_restricted_videos_features_2ktoken_w_prompt_u_desc_audio_object_detection.xlsx", index=False)

total_time = end_time - start_time
print(f"Total processing time: {total_time:.2f} seconds")



Using cache found in C:\Users\aboeck/.cache\torch\hub\ultralytics_yolov5_master
YOLOv5  2024-7-9 Python-3.10.14 torch-2.3.1+cpu CPU

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients, 16.4 GFLOPs
Adding AutoShape... 


Starting video processing...
