In [None]:
# https://www.kaggle.com/timesler/fast-mtcnn-detector-45-fps-at-full-resolution

In [3]:
!wget --load-cookies cookies.txt https://www.kaggle.com/humananalog/blazeface-pytorch/download blazeface.zip

--2020-02-22 07:20:21--  https://www.kaggle.com/humananalog/blazeface-pytorch/download
Resolving www.kaggle.com (www.kaggle.com)... 35.244.233.98
Connecting to www.kaggle.com (www.kaggle.com)|35.244.233.98|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://storage.googleapis.com/kaggle-data-sets/458848/888125/bundle/archive.zip?GoogleAccessId=web-data@kaggle-161607.iam.gserviceaccount.com&Expires=1582615221&Signature=P%2FNZwxPQsFp1M07X7cneCRBC6URNKpVTwThLj%2BsX4NeBgT829AuLYlMWjX916sEWsBhBh7WQ9shh7t5KRgJ9NqrTsKIIkPeOTZpaY2i%2FXW8qk8w1turGy1f41vJT%2B6xE64kpRTkhYJxAu5iNapFQx19MM2BluyaBHqiK2ENRWSnzZ%2Bnd75Bpbt3p1B9yhmjS8cgCE0r1PTLMmbR3RcJDolotXqJfsO7ujC2buSuQiTqdkN3md1JDGJaaEl1tPrqdoq0U5jxyp%2F9QQh80UsmlS43jFv%2FDyQ7iygrKZKXI2x8EByWQk6mHgo47h%2BvCc%2BU21aA04k5ZcK6mflWGyvPIHA%3D%3D&response-content-disposition=attachment%3B+filename%3Dblazeface-pytorch.zip [following]
--2020-02-22 07:20:21--  https://storage.googleapis.com/kaggle-data-sets/458848/888125/b

In [4]:
from facenet_pytorch import MTCNN
from PIL import Image
import torch
from imutils.video import FileVideoStream
import cv2
import time
import glob
from tqdm.notebook import tqdm

device = 'cuda' if torch.cuda.is_available() else 'cpu'

filenames = glob.glob('data/videos/*.mp4')[:100]

In [35]:
from blazeface import BlazeFace

class FastBlaze(object):
    """Fast Blaze implementation."""
    
    def __init__(self, stride, resize=1, *args, **kwargs):
        """Constructor for FastMTCNN class.
        
        Arguments:
            stride (int): The detection stride. Fac8es will be detected every `stride` frames
                and remembered for `stride-1` frames.
        
        Keyword arguments:
            resize (float): Fractional frame scaling. [default: {1}]
            *args: Arguments to pass to the MTCNN constructor. See help(MTCNN).
            **kwargs: Keyword arguments to pass to the MTCNN constructor. See help(MTCNN).
        """
        gpu = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

        self.net = BlazeFace().to(gpu)
        self.net.load_weights("blazeface.pth")
        self.net.load_anchors("anchors.npy")

        # Optionally change the thresholds:
        self.net.min_score_thresh = 0.75
        self.net.min_suppression_threshold = 0.3
        
        self.stride = stride
        self.resize = resize
        
    def __call__(self, frames):
        """Detect faces in frames using strided MTCNN."""
        if self.resize != 1:
            frames = [f.resize([int(d * self.resize) for d in f.size]) for f in frames]
                      
        boxes = self.net.predict_on_batch(frames[::self.stride])

        faces = []
        for i, frame in enumerate(frames):
            box_ind = int(i / self.stride)
            if boxes[box_ind] is None:
                continue
            for box in boxes[box_ind]:
                #faces.append(frame.crop(box))
                faces.append(boxes)
        
        return faces

In [42]:
import numpy as np
def run_detection(blaze, filenames):
    frames = []
    frames_processed = 0
    faces_detected = 0
    batch_size = 30
    start = time.time()

    for filename in tqdm(filenames):

        v_cap = FileVideoStream(filename).start()
        v_len = int(v_cap.stream.get(cv2.CAP_PROP_FRAME_COUNT))

        for j in range(v_len):

            frame = v_cap.read()
            
            if j % 10 == 0 and j > 0:
                frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                frame = cv2.resize(frame, (128, 128))
                frames.append(frame)
            #frame = Image.fromarray(frame)
            

            if len(frames) >= batch_size or j == v_len - 1:

                faces = blaze(np.asarray(frames))

                frames_processed += len(frames)
                faces_detected += len(faces)
                frames = []

                print(
                    f'Frames per second: {frames_processed / (time.time() - start):.3f},',
                    f'faces detected: {faces_detected}\r',
                    end=''
                )

        v_cap.stop()

In [43]:
fast_blaze = FastBlaze(
    stride=4,
    resize=1
)
run_detection(fast_blaze, filenames)

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Frames per second: 14.747, faces detected: 137

KeyboardInterrupt: 

In [28]:
fast_mtcnn = FastMTCNN(
    stride=4,
    resize=0.5,
    margin=14,
    factor=0.5,
    keep_all=True,
    device=device
)
run_detection(fast_mtcnn, filenames)

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Frames per second: 20.919, faces detected: 823

KeyboardInterrupt: 