# Face Detection using MTCNN

We try to use another face detection method to see where we get optimal performance. This time we use [MTCNN](https://github.com/ipazc/mtcnn). Turns out, this works better but takes much longer than OpenCV default method. Perhaps use MTCNN only for cases where Haar Cascades fail to detect faces in a significant fraction of frames. 

In [1]:
import numpy as np
import glob
import json
import cv2
from mtcnn import MTCNN

Using TensorFlow backend.


The initial process is same as before. So we just copy from the other code.

In [15]:
ana_dir_ofc = '/data/anaconda3/envs/aniket1/share/opencv4/haarcascades/'
ana_dir_mac = '/Users/aniket/anaconda3/envs/aniket1/share/OpenCV/haarcascades/'
basedir_ofc = '/data/Kaggle/DeepFake/'
basedir_mac = '/Users/aniket/KaggleData/DeepFake/'

basedir = basedir_ofc
ana_dir = ana_dir_ofc
#get all video filenames
files = glob.glob(basedir+'sample_data/train_sample_videos/*.mp4')
#read json file 
json_file = basedir+'sample_data/train_sample_videos/metadata.json'

with open(json_file, 'r') as f:
    json_dict = json.load(f)

nvids = len(files)     
vid_fname = ""
detector = MTCNN()

Now the loop over videos. This part is going to be different.

In [23]:
face_arr = []
#loop over videos - capture image info as vectors
for i in np.arange(10, 11) :     
    #start capturing images 
    print('Capturing frames from : {}'.format(files[i]))
    vid_fname = files[i].rsplit('/')[-1]
    print(json_dict[vid_fname]['label'])
    cap = cv2.VideoCapture(files[i]) #basedir+'sample_data/train_sample_videos/bmehkyanbj.mp4'
    print('frame size = {}, {}, fps = {}'.format(cap.get(3), cap.get(4), cap.get(5)))
    c = 0
    gray0 = None
    while(cap.isOpened()):
        ret, frame = cap.read()
        if frame is not None : 
            c = c+1
            rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            ffaces = detector.detect_faces(rgb)
            try:
                print(ffaces[0]['box'])
                x, y, w, h = ffaces[0]['box']
                xl, xr, yl, yr = x, x+w, y, y+h #[int(0.95*x), int(1.05*(x+w)), int(0.8*y), int(1.1*(y+h))]
                gray = cv2.rectangle(gray, (xl, yl), (xr, yr), (255, 0, 0), 2)
                face_arr.append(gray[xl:xr, yl:yr])
                if c > 1 : 
                    cv2.imshow('frame', gray)      # - gray0 + 255
                else : 
                    cv2.imshow('frame', gray)
                gray0 = gray.copy()
                if cv2.waitKey(0) & 0xFF == ord('q'):
                    break
            except : 
                print('Faces not detected!')
        else : 
            break
    cap.release()
    cv2.destroyAllWindows()
    cv2.waitKey(1)
    print('num frames = {}'.format(c))

Capturing frames from : /data/Kaggle/DeepFake/sample_data/train_sample_videos/adhsbajydo.mp4
FAKE
frame size = 1920.0, 1080.0, fps = 29.97
[899, 162, 103, 131]
[898, 160, 104, 132]
[898, 158, 108, 137]
[897, 156, 108, 139]
num frames = 4


In [24]:
print(json_dict['adhsbajydo.mp4'])
print(len(face_arr))

{'label': 'FAKE', 'split': 'train', 'original': 'fysyrqfguw.mp4'}
4
