In [1]:
import os
import ray_ease as rez
import cv2
import moviepy.editor
import pytube
import warnings

warnings.filterwarnings("ignore")

tmp = os.getenv("MODEL_PATH")
os.environ["MODEL_PATH"] = tmp if tmp else "./haarcascade_frontalface_default.xml"

OUTPUT_DIR = "./tmp"

In [2]:
@rez.parallelize
def detect_faces(fn, start, stop):
    # Extract the subclip from the video
    clip = moviepy.editor.VideoFileClip(fn).subclip(start, stop)

    # Load face detector
    face_cascade = cv2.CascadeClassifier(os.getenv("MODEL_PATH"))

    # Run face detector on frames
    imgs = []
    for img in clip.iter_frames():
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        faces = face_cascade.detectMultiScale(gray, 1.1, 4)
        for x, y, w, h in faces:
            cv2.rectangle(img, (x, y), (x + w, y + h), (255, 0, 0), 2)
        imgs.append(img)

    # Create mp4 of result
    out_clip = moviepy.editor.ImageSequenceClip(imgs, fps=clip.fps)
    out_fn = f"./clips/{start:04d}.mp4"
    out_clip.write_videofile(out_fn, logger=None)
    return out_fn

def process_video(url):
    print(f"Downloading video from '{url}'")
    yt = pytube.YouTube(url)
    stream = yt.streams.filter(file_extension="mp4").first()
    fn = stream.download(output_path="./clips/", max_retries=5)

    # Get duration
    duration = moviepy.editor.VideoFileClip(fn).duration

    # Create (start, stop) intervals
    intervals = [(fn, offset, offset + 1) for offset in range(int(duration))]

    print("Processing each range of 1s intervals in parallel using Ray")
    out_fns = rez.retrieve([detect_faces(*interval) for interval in intervals], parallel_progress=True, parallel_progress_kwargs={"desc": "1s Intervals"})

    print("Converting detections to video clips")
    out_clips = [moviepy.editor.VideoFileClip(out_fn) for out_fn in out_fns]

    print("Concatenating results")
    final_clip = moviepy.editor.concatenate_videoclips(out_clips)
    final_fn = "./clips/out.mp4"
    final_clip.write_videofile(final_fn, logger=None)

    # Return the full image data
    with open(final_fn, "rb") as f:
        return os.path.basename(fn), f.read()
    
def main(youtube_url: str = "https://www.youtube.com/watch?v=dQw4w9WgXcQ", output_dir = OUTPUT_DIR):
    fn, movie_data = process_video(youtube_url)
    if not os.path.exists(output_dir):
        os.mkdir(output_dir)
    abs_fn = os.path.join(output_dir, f"{os.getenv('RAY_EASE')}_"+fn)
    print(f"writing results to {abs_fn}")
    with open(abs_fn, "wb") as f:
        f.write(movie_data)

## Running the script

Note that we don't preserve the sound in the video.

**Further directions**

As you can tell from the resulting video, this face detection model is not state of the art. It has plenty of false positives (non-faces being labeled faces) and false negatives (real faces not being labeled). For better model, consider a modern one based on deep learning.

In [3]:
import time

start = time.time()

rez.init("ray")
main()

total = time.time() - start
print(f"duration = {int((total//60)//60)}h {int((total//60)%60)}min {int(total%60)}s")

2023-10-21 18:11:33,089	INFO worker.py:1633 -- Started a local Ray instance. View the dashboard at [1m[32m127.0.0.1:8266 [39m[22m


Downloading video from 'https://www.youtube.com/watch?v=dQw4w9WgXcQ'
Processing each range of 1s intervals in parallel using Ray


1s Intervals: 100%|██████████| 212/212 [00:35<00:00,  6.05it/s]


Converting detections to video clips
Concatenating results
writing results to ./tmp/ray_Never Gonna Give You Up.mp4
duration = 0h 0min 49s
