In [None]:
!git clone https://github.com/ultralytics/yolov5
!cd yolov5
!pip install -r /content/yolov5/requirements.txt

fatal: destination path 'yolov5' already exists and is not an empty directory.


In [None]:
import torch
import cv2
import os
import numpy as np
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

# Load YOLOv5 model
model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)

# Define folder containing videos
input_folder = '/content/sample_data/bruh'
output_folder = '/content/sample_data/bruhh'

# Ensure output folder exists
os.makedirs(output_folder, exist_ok=True)

# Define fixed size for output video and padding
fixed_width, fixed_height = 480, 800  # Example dimensions, adjust as needed
padding = 50
smoothing_window = 5  # Number of frames for moving average smoothing

# Function to apply moving average smoothing
def moving_average(data, window_size):
    cumsum = np.cumsum(data, dtype=float)
    cumsum[window_size:] = cumsum[window_size:] - cumsum[:-window_size]
    return cumsum[window_size - 1:] / window_size

# Iterate through each file in the input folder
for filename in os.listdir(input_folder):
    if filename.lower().endswith(('.avi', '.mp4', '.mov')):  # Add other formats if needed
        input_path = os.path.join(input_folder, filename)
        output_filename = f'Output_{filename}'
        output_path = os.path.join(output_folder, output_filename)

        print(f"Processing file: {filename}")

        # Load video
        cap = cv2.VideoCapture(input_path)
        out = None

        frame_count = 0
        max_width, max_height = 0, 0  # Variables to track max bounding box size
        bboxes_x_min = []
        bboxes_y_min = []
        bboxes_x_max = []
        bboxes_y_max = []

        # First pass: Determine the largest bounding box size
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break

            # Process each frame using YOLOv5 to detect the person with the highest confidence
            results = model(frame)

            # Extract bounding boxes and confidence scores
            detections = results.xyxy[0].numpy()
            highest_confidence_person = None
            highest_confidence_score = 0.0
            largest_bbox_area = 0.0

            # Iterate through detections to find the person with the largest bounding box or highest confidence
            for detection in detections:
                x_min, y_min, x_max, y_max, confidence, class_id = detection
                if class_id == 0:  # class_id 0 typically represents 'person' in COCO dataset
                    bbox_width, bbox_height = x_max - x_min, y_max - y_min

                    # Choose the person with the largest bounding box or highest confidence
                    if bbox_width * bbox_height > largest_bbox_area:
                        largest_bbox_area = bbox_width * bbox_height
                        highest_confidence_person = (x_min, y_min, x_max, y_max)
                        highest_confidence_score = confidence

                    # Update max width and height
                    if bbox_width > max_width:
                        max_width = bbox_width
                    if bbox_height > max_height:
                        max_height = bbox_height

        cap.release()  # Close the video file after the first pass

        # Second pass: Apply consistent bounding box size with dynamic tracking
        cap = cv2.VideoCapture(input_path)
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                print("Failed to retrieve frame or end of video.")
                break

            # Process each frame using YOLOv5 to detect the person with the highest confidence
            results = model(frame)

            # Extract bounding boxes and confidence scores
            detections = results.xyxy[0].numpy()
            highest_confidence_person = None
            highest_confidence_score = 0.0

            # Iterate through detections to find the person with the largest bounding box or highest confidence
            for detection in detections:
                x_min, y_min, x_max, y_max, confidence, class_id = detection
                if class_id == 0:  # class_id 0 typically represents 'person' in COCO dataset
                    bbox_width, bbox_height = x_max - x_min, y_max - y_min

                    # Choose the person with the largest bounding box or highest confidence
                    highest_confidence_person = (x_min, y_min, x_max, y_max)

            # If a person is detected, adjust the bounding box size
            if highest_confidence_person:
                x_min, y_min, x_max, y_max = highest_confidence_person

                # Calculate the center of the bounding box
                center_x = (x_min + x_max) / 2
                center_y = (y_min + y_max) / 2

                # Append new bounding box coordinates to lists
                bboxes_x_min.append(center_x - max_width / 2 - padding)
                bboxes_y_min.append(center_y - max_height / 2 - padding)
                bboxes_x_max.append(center_x + max_width / 2 + padding)
                bboxes_y_max.append(center_y + max_height / 2 + padding)

                # Apply moving average smoothing
                if len(bboxes_x_min) > smoothing_window:
                    smoothed_x_min = moving_average(bboxes_x_min, smoothing_window)[-1]
                    smoothed_y_min = moving_average(bboxes_y_min, smoothing_window)[-1]
                    smoothed_x_max = moving_average(bboxes_x_max, smoothing_window)[-1]
                    smoothed_y_max = moving_average(bboxes_y_max, smoothing_window)[-1]
                else:
                    smoothed_x_min = bboxes_x_min[-1]
                    smoothed_y_min = bboxes_y_min[-1]
                    smoothed_x_max = bboxes_x_max[-1]
                    smoothed_y_max = bboxes_y_max[-1]

                # Ensure bounding box coordinates are within frame bounds
                smoothed_x_min = int(max(0, smoothed_x_min))
                smoothed_y_min = int(max(0, smoothed_y_min))
                smoothed_x_max = int(min(frame.shape[1], smoothed_x_max))
                smoothed_y_max = int(min(frame.shape[0], smoothed_y_max))

                # Crop the frame based on the smoothed bounding box
                cropped_frame = frame[smoothed_y_min:smoothed_y_max, smoothed_x_min:smoothed_x_max]

                # Resize cropped frame to fixed size
                cropped_frame_resized = cv2.resize(cropped_frame, (fixed_width, fixed_height))

                # Initialize VideoWriter if it hasn't been initialized yet
                if out is None:
                    fourcc = cv2.VideoWriter_fourcc(*'XVID')
                    out = cv2.VideoWriter(
                        output_path,
                        fourcc,
                        30,  # frame rate
                        (fixed_width, fixed_height)  # fixed frame size
                    )
                out.write(cropped_frame_resized)

                frame_count += 1

        print(f"Total frames processed for {filename}: {frame_count}")
        cap.release()
        if out:
            out.release()

print("Processing complete. All videos have been processed.")

Downloading: "https://github.com/ultralytics/yolov5/zipball/master" to /root/.cache/torch/hub/master.zip
YOLOv5 🚀 2024-9-15 Python-3.10.12 torch-2.4.0+cu121 CPU

Downloading https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5s.pt to yolov5s.pt...
100%|██████████| 14.1M/14.1M [00:00<00:00, 160MB/s]

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients, 16.4 GFLOPs
Adding AutoShape... 


Processing file: S7_walking_toRight_sideView_HD.mp4
Failed to retrieve frame or end of video.
Total frames processed for S7_walking_toRight_sideView_HD.mp4: 401
Processing file: S1_walking_toLeft_sideView_HD.mp4
Failed to retrieve frame or end of video.
Total frames processed for S1_walking_toLeft_sideView_HD.mp4: 304
Processing file: S6_walking_toRight_sideView_HD.mp4
Failed to retrieve frame or end of video.
Total frames processed for S6_walking_toRight_sideView_HD.mp4: 446
Processing file: S4_walking_toLeft_sideView_HD.mp4
Failed to retrieve frame or end of video.
Total frames processed for S4_walking_toLeft_sideView_HD.mp4: 224
Processing file: S10_walking_toRight_sideView_HD.mp4
Failed to retrieve frame or end of video.
Total frames processed for S10_walking_toRight_sideView_HD.mp4: 363
Processing file: S8_walking_toRight_sideView_HD.mp4
Failed to retrieve frame or end of video.
Total frames processed for S8_walking_toRight_sideView_HD.mp4: 348
Processing file: S10_walking_toLeft_