In [None]:
!pip install dlib

In [None]:
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0" # Set to 0 if want to use 1st GPU, 1 if want to use 2nd GPU, etc.

In [None]:
from pathlib import Path
import os
import cv2
from matplotlib import pyplot as plt
import numpy as np
from tqdm import tqdm
import shutil
import time
import dlib
from random import shuffle

In [None]:
# https://github.com/davisking/dlib-models/raw/master/mmod_human_face_detector.dat.bz2
detector = dlib.cnn_face_detection_model_v1("mmod_human_face_detector.dat")

In [None]:
import bisect
def get_spaced_nums(count, req_frames):
    factor = 2
    if req_frames == 1:
        factor = count // 4
    bounds = [0, count // 2, count-1]
    new_bounds = [count // 2]
    while len(bounds) - 2 < factor*req_frames:
        curr_bounds = []
        for i in range(len(bounds)-1):
            lower_bound = bounds[i]
            upper_bound = bounds[i+1]
            bound = (lower_bound + upper_bound) / 2
            new_bounds.append(bound)
            curr_bounds.append(bound)
        for new_bound in curr_bounds:
            bisect.insort(bounds, new_bound)
    return np.array(new_bounds,dtype=np.int)

In [None]:
# Video Dataset should be in structure:
"""
DatasetName
|
|- train
|    |- real
|    |- fake
|
|- val
|   |- real
|   |- fake
"""

In [None]:
videos_path = 'PATH CONTAINING THE VIDEO DATASET/'
output_path = 'PATH WHERE THE FACE DATASET SHOULD GO/'

In [None]:
for split in ['train/','val/']:
    for label in ['real/','fake/']:
        print(videos_path+split+label+': '+str(len([f for f in os.listdir(videos_path+split+label) if 'mp4' in f])))

In [None]:
for split in ['train/','val/']:
    for label in ['real/','fake/']:
        Path(output_path+split+label).mkdir(parents=True, exist_ok=True)

In [None]:
split = 'train/'
label = 'real/'
facepath = output_path + split + label
videos = sorted(os.listdir(videos_path + split + label))
req_frames = 5
scale = 1.3
req_size = 200

In [None]:
logs_dict = {}

In [None]:
for video in tqdm(videos):
    path = videos_path + split + label + video
    video_capture = cv2.VideoCapture(path)
    count = int(video_capture.get(cv2.CAP_PROP_FRAME_COUNT))
    video_capture.release()
    more_than_one = 0
    no_faces = 0
    out_of_frame = 0
    done_frames = 0
    img_size = -1
    cap = cv2.VideoCapture(path)
    for frame_number in get_spaced_nums(count, req_frames):
        cap.set(cv2.CAP_PROP_POS_FRAMES, frame_number)
        res, frame = cap.read()
        if not res:
            break
        faceRects = detector(frame[:,:,::-1], 0)
        if len(faceRects) == 0:
            no_faces += 1
            continue
        if len(faceRects) > 1:
            more_than_one += 1
            continue
        x1 = faceRects[0].rect.left()
        y1 = faceRects[0].rect.top()
        x2 = faceRects[0].rect.right()
        y2 = faceRects[0].rect.bottom()
        center = ((x1 + x2) // 2, (y1 + y2) // 2)
        length = max(x2 - x1, y2 - y1)
        x1 = center[0] - int(length * scale / 2)
        x2 = center[0] + int(length * scale / 2)
        y1 = center[1] - int(length * scale / 2)
        y2 = center[1] + int(length * scale / 2)
        if x1 < 0 or y1 < 0 or x2 > frame.shape[1] or y2 > frame.shape[0]:
            out_of_frame += 1
            continue
        final = frame[y1:y2,x1:x2,::-1]
        # Either resize images now or resize them during training
        # final = cv2.resize(final,(req_size,req_size),interpolation = cv2.INTER_CUBIC)
        plt.imsave(facepath + video[:-4] + '_' + str(frame_number).zfill(5) + '.jpg', final)
        done_frames += 1
        if done_frames == req_frames:
            img_size = y2-y1
            break
    logs_dict[video] = [more_than_one, no_faces, out_of_frame, img_size, done_frames]
    cap.release()

In [None]:
import json

In [None]:
# Logs
with open('dlib_{}{}_logs.json'.format(split[:-1],label[:-1]), 'w') as fp:
    json.dump(logs_dict, fp)