In [1]:
import pandas as pd
from moviepy.editor import *
from PIL import Image
from PIL import ImageDraw
from numpy import asarray
import os
import subprocess

In [2]:
# Change ffmpeg used by moviepy to the one installed if one is installed, otherwise use the one from moviepy.
# This is necessary for using HW acceleration.
try:
    from moviepy.config import change_settings
    change_settings({"FFMPEG_BINARY":"ffmpeg"})
except:
    pass

In [3]:
# this can be empty if the video file and its videopipe output are at the same
# location as the code
path = ''
v_name = 'HIGH_LIGHTS_I_SNOWMAGAZINE_I_SANDER_26'
task = '_frame_face_detection_datamodel'
w, h = 1920, 1080
RESIZE_DIM = 640

In [4]:
## read face detection json

faces = pd.read_json(path + v_name + '/' + v_name + task + '.json', lines = True)

faces_detected = [f for f in faces.data[0] if len(f['faces']) > 0]

In [5]:
## read video file with moviepy
clip = VideoFileClip(v_name + '.mp4')
audio = clip.audio

# Write audio to file
audio.write_audiofile(v_name + '_audio.mp3')

fps = clip.fps
frame_duration = 1 / fps

MoviePy - Writing audio in HIGH_LIGHTS_I_SNOWMAGAZINE_I_SANDER_26_audio.mp3


                                                                        

MoviePy - Done.




In [6]:
def get_frame(clip, frame_number):
    return Image.fromarray(clip.get_frame(frame_number * frame_duration))

In [7]:
# Draw bounding box on each frame with a detected face, since images are
# resized, we need to scale the bounding box coordinates.
def draw_bounding_boxes(face, img, width_ratio, height_ratio):
    for i in range(len(face['faces'])):
        y0, x1, y1, x0 = face['faces'][i]['bb_faces']
        y0 = int(y0 * height_ratio)
        y1 = int(y1 * height_ratio)
        x0 = int(x0 * width_ratio)
        x1 = int(x1 * width_ratio)

        draw = ImageDraw.Draw(img)
        draw.rectangle([x0, y0, x1, y1], outline='red')

    return img

In [8]:
# Set how many faces to be included in each video clip. If set to -1, all faces
# will be included in one video clip. A lower number will result in more video
# clips using less memory but more disk space. A higher number will result in
# less video clips using more memory but less disk space. Also depends on how
# many faces are detected in the video.
faces_limit = 100

duration_t = frame_duration
prev_t = 0

f = open('face_detection.txt', 'w')

def get_face_clips(faces_detected, faces_limit=100, timestamp=0):
    clips = []
    face_count = 0
    for face in faces_detected:
        if face_count == faces_limit:
            break

        img = get_frame(clip, face['dimension_idx'])
        t = face['dimension_idx'] * frame_duration

        w, h = img.size
        width_ratio = w / RESIZE_DIM
        height_ratio = h / RESIZE_DIM

        draw_bounding_boxes(face, img, width_ratio, height_ratio)

        if (timestamp != t):
            clips.append(clip.subclip(timestamp, t))
        clips.append(ImageClip(asarray(img), duration=duration_t))
        img.close()
        timestamp = t + duration_t
        face_count += 1

        # Add final clip if it is the last face.
        if face == faces_detected[-1]:
            clips.append(clip.subclip(timestamp, clip.duration))
            timestamp = clip.duration

    return clips, timestamp

# Create video clips with detected faces and concatenate them into one video.
for i in range(len(faces_detected) // faces_limit + 1):
    clips = []
    clips, prev_t = get_face_clips(faces_detected[i * faces_limit:], faces_limit, prev_t)

    # Try hw acceleration, else use cpu.
    try:
        concatenate_videoclips(clips).write_videofile('face_detection_' + str(i) + '.mp4', codec='h264_nvenc', fps=fps, logger=None, audio=False, preset='3')
    except:
        try:
            concatenate_videoclips(clips).write_videofile('face_detection_' + str(i) + '.mp4', codec='libx264', fps=fps, logger=None, audio=False)
        except:
            raise Exception('An error occured while writing the video file.')
    f.write('file face_detection_' + str(i) + '.mp4\n')
f.close()

In [9]:
# remove any existing output.mp4 file
if os.path.exists('output.mp4'):
    os.remove('output.mp4')
if os.path.exists(v_name + '_face_detection.mp4'):
    os.remove(v_name + '_face_detection.mp4')

# Concatenate all the files in the face_detection.txt file into one final clip
# and write to .mp4 file.
subprocess.call("ffmpeg -f concat -safe 0 -i face_detection.txt -c copy output.mp4", shell=True)

# Add audio to the final clip.
subprocess.call("ffmpeg -i output.mp4 -i " + v_name + "_audio.mp3 -c:v copy -c:a aac -map 0:v:0 -map 1:a:0 -shortest " + v_name + "_face_detection.mp4", shell=True)

# Delete all the subclips.
for i in range(len(faces_detected) // faces_limit + 1):
    os.remove('face_detection_' + str(i) + '.mp4')

# Delete the face_detection.txt file.
os.remove('face_detection.txt')

# Delete the audio file.
os.remove(v_name + '_audio.mp3')