In [1]:
from ultralytics import YOLO
import cv2
import math
from tqdm import tqdm
import torch
import os
import moviepy.editor as mpe

In [2]:
# def retrieve_audio(video):
#     video_clip = mpe.VideoFileClip(video)

#     # Extract the audio from the video clip
#     audio_clip = video_clip.audio
#     video_clip.close()

#     return audio_clip
# def combine_audio(video, audio, outname, fps):
#     final_clip = video.set_audio(audio)
#     # os.remove(outname)
#     print(type(final_clip))
#     final_clip.write_videofile(f"f{outname}", fps=fps)

In [3]:
def get_models(model_dir="models"):
    model_list = []
    for model_name in os.listdir(model_dir):
        model_list.append(YOLO(os.path.join(model_dir, model_name)))
    return model_list

In [4]:
def blur_box(frame, box, min_conf=0.2):
    if math.ceil((box.conf[0]*100))/100 > min_conf:
        x1, y1, x2, y2 = box.xyxy[0]
        x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
        h, w = y2-y1, x2-x1

        ROI = frame[y1:y1+h, x1:x1+w]
        blur = cv2.GaussianBlur(ROI, (51,51), 0) 
        frame[y1:y1+h, x1:x1+w] = blur
    return frame

def read_video_file(video_path):
    vidcap = cv2.VideoCapture(video_path)
    return vidcap, \
            int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT)), \
            vidcap.get(cv2.CAP_PROP_FPS), \
            (int(vidcap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(vidcap.get(cv2.CAP_PROP_FRAME_HEIGHT)))

def create_video_file(output_path, fps, frame_size):
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    video = cv2.VideoWriter(filename=output_path, 
                        fourcc=fourcc, 
                        fps=fps, 
                        frameSize=frame_size)
    return video

In [5]:
def transform1(model_list, video_path, output_path):
    vidcap, frame_number, fps, frame_size = read_video_file(video_path)
    video = create_video_file(output_path, fps, frame_size)
    success, img = vidcap.read()
    if success:
        for i in tqdm(range(frame_number)):
            for model in model_list:
                results = model(img, stream=True, verbose=False)
                for r in results:
                    boxes = r.boxes
                    for box in boxes:
                        img = blur_box(img, box)

                video.write(img)
                success, img = vidcap.read()

        vidcap.release()
        video.release()
        return True
    else: return False

In [10]:
def transform2(model_list, video_path, output_path):
    vidcap, frame_number, fps, frame_size = read_video_file(video_path)
    video = create_video_file(output_path, fps, frame_size)

    results = [model(video_path, verbose=True) for model in model_list]
    mean_conf = torch.tensor([[i.conf for x in r for i in x.boxes] for r in results]).mean(1).item()
    conf_tol = mean_conf * 0.75
    interesting_frame = torch.tensor([[(x.boxes.conf > conf_tol).any().item() for x in r] for r in results]).any(0).tolist()
    
    success, img = vidcap.read()
    if success:
        for i in tqdm(range(frame_number)):
            if interesting_frame[i]:
                for result_id in range(len(model_list)):
                    frame_result = results[result_id][i]
                    for r in frame_result:
                        boxes = r.boxes
                        for box in boxes:
                            img = blur_box(img, box, conf_tol)

            video.write(img)
            success, img = vidcap.read()
    vidcap.release()
    video.release()

In [11]:
model_list = get_models()

In [12]:
transform1(model_list, "action_bronson.mp4", "video1.mp4")

100%|██████████| 20819/20819 [17:31<00:00, 19.80it/s]


True

In [13]:
transform2(model_list, "action_bronson.mp4", "video2.mp4")


errors for large sources or long-running streams and videos. See https://docs.ultralytics.com/modes/predict/ for help.

Example:
    results = model(source=..., stream=True)  # generator of Results objects
    for r in results:
        boxes = r.boxes  # Boxes object for bbox outputs
        masks = r.masks  # Masks object for segment masks outputs
        probs = r.probs  # Class probabilities for classification outputs



KeyboardInterrupt: 