In [None]:
import shutil
import os
import cv2
import math
import numpy as np
from ultralytics import YOLO

from ultralytics.utils.checks import check_imshow
from ultralytics.utils.plotting import Annotator, colors

In [None]:
def crop_video(input_video_path, output_video_path, crop_coords):
    cap = cv2.VideoCapture(input_video_path)

    # Define the codec and create VideoWriter object
    fourcc = cv2.VideoWriter_fourcc(*'MJPG')
    out = None

    max_x1, max_y1, max_x2, max_y2 = crop_coords

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Crop the frame
        cropped_frame = frame[max_y1:max_y2, max_x1:max_x2]

        # Initialize VideoWriter with the shape of the cropped frame
        if out is None:
            out = cv2.VideoWriter(output_video_path, fourcc, cap.get(cv2.CAP_PROP_FPS), (cropped_frame.shape[1], cropped_frame.shape[0]))

        # Write the cropped frame
        out.write(cropped_frame)

    # Release everything when job is finished
    cap.release()
    if out is not None:
        out.release()
    cv2.destroyAllWindows()


In [None]:
def extrac_mmbox(inp_video_dir, opt_video_dir, model):
    
    if not os.path.exists(opt_video_dir):
        os.makedirs(opt_video_dir)
    
    for root, dirs, filenames in os.walk(inp_video_dir):
        for filename in filenames:
            
            video_file_path = os.path.join(root, filename)
            results = model.predict(video_file_path, stream=True, conf=0.3, classes=0)
    
            max_x1, max_y1 = float('inf'), float('inf')
            max_x2, max_y2 = 0, 0

            max_people = 0

            for r in results:

                people = 0

                for rr in r:
                    #print(rr.boxes)
                    #print(rr.boxes.xyxy[0][0].item())

                    max_x1 = min(max_x1, rr.boxes.xyxy[0][0].item())
                    max_y1 = min(max_y1, rr.boxes.xyxy[0][1].item())
                    max_x2 = max(max_x2, rr.boxes.xyxy[0][2].item())
                    max_y2 = max(max_y2, rr.boxes.xyxy[0][3].item())

                    #print("\n","new box","\n")
                    people += 1

                if people > 1:
                    max_people = max(max_people, people)

                #print("\n","new frame","\n")
                
            output_video_path = os.path.join(opt_video_dir, filename)   
            
            if max_people > 1 :
                
                crop_coords = (math.ceil(max_x1), math.ceil(max_y1), math.ceil(max_x2), math.ceil(max_y2))  

                crop_video(video_file_path, output_video_path, crop_coords)
                
            else:
                
                shutil.copy(video_file_path, output_video_path)
                
                

In [None]:
model = YOLO('yolov8x.pt')

In [None]:
inp_video_dir = 'path to the input videos'
opt_video_dir = 'path to the output videos'

In [None]:
extrac_mmbox(inp_video_dir, opt_video_dir, model)