In [None]:
# Install libraries
!pip install ultralytics
!pip install ffmpeg-python

In [None]:
# Download model and video.
!gdown 1qcr9DbgsX3ryrz2uU8w4Xm3cOrRywXqb
!wget https://github.com/iioSnail/pytorch_deep_learning_examples/raw/refs/heads/main/asserts/mp4/kunkun.mp4

In [None]:
import ffmpeg
import cv2
from numpy import ndarray
from ultralytics import YOLO
from tqdm import tqdm

In [4]:
# Apply mosaic to an image
def mosaic_image(model, image:ndarray, mosaic_scale = 10) -> ndarray:
    results = model(image, verbose=False)
    results[0].boxes

    boxes = results[0].boxes.xyxy
    for i in range(len(boxes)):
        x1, y1, x2, y2 = boxes[i].int()
        roi = image[y1:y2, x1:x2]

        h, w = roi.shape[:2]
        small_roi = cv2.resize(roi, (w // mosaic_scale, h // mosaic_scale), interpolation=cv2.INTER_LINEAR)
        mosaic_roi = cv2.resize(small_roi, (w, h), interpolation=cv2.INTER_NEAREST)
        image[y1:y2, x1:x2] = mosaic_roi

    return image

In [5]:
# Define filepaths.
input_video = "kunkun.mp4"
tmp_audio = "tmp.wav"
tmp_video = "tmp_kunkun.mp4"
output_video = "mosaic_kunkun.mp4"

model = YOLO("yolov8n-face.pt")

In [6]:
# Extract audio from the video.
ffmpeg.input(input_video).output(tmp_audio, format='wav').run(overwrite_output=True)

(None, None)

In [None]:
# Play mosaic frame by frame and generate the output video with mosaic.
cap = cv2.VideoCapture(input_video)
if not cap.isOpened():
    print("Error: Could not open video file.")
    exit(0)

width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)
n_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(tmp_video, fourcc, fps, (width, height))

pro_bar = tqdm(total=n_frames)
while True:
    ret, frame = cap.read()

    if not ret:
        break

    frame = mosaic_image(model, frame)
    out.write(frame)

    pro_bar.update(1)

cap.release()
out.release()
pro_bar.close()

In [None]:
# Merge the video and audio.
video_stream = ffmpeg.input(tmp_video)
audio_stream = ffmpeg.input(tmp_audio)
ffmpeg.output(video_stream, audio_stream, output_video, vcodec="copy", acodec='aac').run(overwrite_output=True)

In [None]:
# Show the result video.
from IPython.display import HTML
from base64 import b64encode
import os

# Compressed video path
compressed_path = "./compressed.mp4"
os.system(f"ffmpeg -i {output_video} -vcodec libx264 {compressed_path}")

# Show video
mp4 = open(compressed_path,'rb').read()
data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
HTML("""
<video width=400 controls>
      <source src="%s" type="video/mp4">
</video>
""" % data_url)