In [10]:
import cv2
from ultralytics import YOLO
import os
import subprocess

In [11]:
# Load your YOLOv8 model
model = YOLO("yolov8n.pt")


# Function to detect people in each frame
def detect_person_in_frame(frame):
    results = model(frame)
    # Filter detections to check for 'person' class
    for result in results:
        boxes = result.boxes  # Get the bounding boxes
        if boxes is not None:
            for box in boxes:
                if int(box.cls) == 0:  # Class 0 typically represents 'person'
                    return True
    return False


# Function to process the video and trim based on person detection
def trim_video_based_on_person(input_video_path, output_dir):
    # Extract the base name of the input file
    base_name = os.path.splitext(os.path.basename(input_video_path))[0]

    cap = cv2.VideoCapture(input_video_path)
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    # Variables to track the start and end of person appearances
    start_time, end_time = None, None
    trimming_ranges = []

    frame_idx = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Display progress of frame processing
        print(f"Processing frame {frame_idx + 1}/{frame_count}")

        # Detect person in the current frame
        person_present = detect_person_in_frame(frame)

        if person_present:
            if start_time is None:
                start_time = max(0, frame_idx / fps)  # Start time when person appears
            end_time = frame_idx / fps  # End time as long as the person is present
        else:
            if start_time is not None:
                trimming_ranges.append((start_time, end_time))
                start_time, end_time = None, None

        frame_idx += 1

    cap.release()

    # Ensure to add the last detected segment if the video ended with a person
    if start_time is not None:
        trimming_ranges.append((start_time, end_time))

    # Minimum duration threshold (in seconds)
    min_duration = 5

    # Now trim the video using FFmpeg based on the identified ranges
    for i, (start, end) in enumerate(trimming_ranges):
        duration = end - start  # Calculate the segment duration
        if (
            start != end and duration >= min_duration
        ):  # Only trim if duration >= 5 seconds
            trimmed_output_path = os.path.join(
                output_dir, f"{base_name}_trimmed_{i}.mp4"
            )  # Use base name for output video
            ffmpeg_command = [
                "ffmpeg",
                "-loglevel",
                "debug",
                "-i",
                input_video_path,
                "-ss",
                str(start),
                "-to",
                str(end),
                "-c:v",
                "libx264",
                trimmed_output_path,
            ]
            # Run the FFmpeg command
            try:
                subprocess.run(ffmpeg_command, check=True)
                print(f"Trimmed video saved to: {trimmed_output_path}")
            except subprocess.CalledProcessError as e:
                print(f"Error trimming video: {e}")
        else:
            print(
                f"Skipping segment from {start} to {end} (duration: {duration:.2f} seconds)"
            )


# Example usage:
input_video = "D:/Kuliah/D4 - PENS/Proyek Akhir/Project/Footage/07-01-2024/20240107_004145_tp00008.mp4"
output_dir = "D:/Kuliah/D4 - PENS/Proyek Akhir/Project/Footage/07-01-2024/trim"
trim_video_based_on_person(input_video, output_dir)

Processing frame 1/26600

0: 384x640 2 persons, 24.7ms
Speed: 3.8ms preprocess, 24.7ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)
Processing frame 2/26600

0: 384x640 2 persons, 23.6ms
Speed: 2.0ms preprocess, 23.6ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)
Processing frame 3/26600

0: 384x640 2 persons, 23.5ms
Speed: 1.8ms preprocess, 23.5ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)
Processing frame 4/26600

0: 384x640 2 persons, 18.6ms
Speed: 3.0ms preprocess, 18.6ms inference, 2.5ms postprocess per image at shape (1, 3, 384, 640)
Processing frame 5/26600

0: 384x640 2 persons, 18.8ms
Speed: 3.0ms preprocess, 18.8ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)
Processing frame 6/26600

0: 384x640 2 persons, 18.2ms
Speed: 2.5ms preprocess, 18.2ms inference, 2.9ms postprocess per image at shape (1, 3, 384, 640)
Processing frame 7/26600

0: 384x640 2 persons, 18.6ms
Speed: 3.0ms preprocess, 18.6ms i