In [1]:
from IPython.display import Video
import cv2
import os
import random

from facenet_pytorch import MTCNN
from PIL import Image
import torch
from imutils.video import FileVideoStream
import time
import glob
from tqdm.notebook import tqdm

device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [2]:
class FastMTCNN(object):
    """Fast MTCNN implementation."""
    
    def __init__(self, stride, resize=1, *args, **kwargs):
        """Constructor for FastMTCNN class.
        
        Arguments:
            stride (int): The detection stride. Faces will be detected every `stride` frames
                and remembered for `stride-1` frames.
        
        Keyword arguments:
            resize (float): Fractional frame scaling. [default: {1}]
            *args: Arguments to pass to the MTCNN constructor. See help(MTCNN).
            **kwargs: Keyword arguments to pass to the MTCNN constructor. See help(MTCNN).
        """
        self.stride = stride
        self.resize = resize
        self.mtcnn = MTCNN(*args, **kwargs)
        
    def __call__(self, frames):
        """Detect faces in frames using strided MTCNN."""
        if self.resize != 1:
            frames = [
                cv2.resize(f, (int(f.shape[1] * self.resize), int(f.shape[0] * self.resize)))
                    for f in frames
            ]
                      
        boxes, probs = self.mtcnn.detect(frames[::self.stride])

        faces = []
        for i, frame in enumerate(frames):
            box_ind = int(i / self.stride)
            if boxes[box_ind] is None:
                continue
            for box in boxes[box_ind]:
                box = [int(b) for b in box]
                center_x, center_y = (box[0] + box[2]) // 2, (box[1] + box[3]) // 2
                wide = max(box[2] - box[0], box[3] - box[1]) // 2
                face = frame[center_y - wide:center_y + wide, center_x - wide:center_x + wide]
                faces.append(face)
                #faces.append(frame[box[1] :box[3], box[0]:box[2]])
                
        
        return faces
    
fast_mtcnn = FastMTCNN(
    
    stride=1,
    resize=1,
    #image_size=1000,
    #margin=20,
    factor=0.6,
    keep_all=True,
    device=device
)

In [3]:
def generate_frame(path, frame_number):
    #create folder if it doesn't exist
    if not os.path.exists('Private_dataset/' + path):
        os.makedirs('Private_dataset/' + path)
        

    vidcap = cv2.VideoCapture('Private_dataset/{}.MOV'.format(path))

    #sample 40 frames uniformly from the video
    frame_count = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_indices = sorted(random.sample(range(frame_count), frame_number))
    vidcap.set(cv2.CAP_PROP_POS_FRAMES, frame_indices[0])
    success, image = vidcap.read()
    count = 0
    res = []
    
    for i in range(1, frame_count):
        if i == frame_indices[count]:
            #rotate the image 180 degrees
            image = cv2.rotate(image, cv2.ROTATE_180)
            # image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            # faces = fast_mtcnn(image)
            # cv2.imwrite(f"./Private_dataset/{format(path)}/{format(count, '04d')}.jpg", image)
            res.append(image)
            count += 1
            if count >= len(frame_indices):
                break
        success, image = vidcap.read()
        if not success:
            break
    
    print(len(res))
    res = fast_mtcnn(res)
    print(len(res))
    count = 0
    for face in res:
        cv2.imwrite(f"./Private_dataset/{format(path)}/{format(count, '04d')}.jpg", face)
        count += 1


generate_frame('Xiang', 40)
# generate_frame('Jiaxun', 40)
# generate_frame('Bowen', 40)

0


  if method is "Min":
  if method is "Min":
  if method is "Min":


ValueError: need at least one array to stack