In [1]:
import math
import os
import gc
import sys
import time
from pathlib import Path
from tqdm.notebook import tqdm

from numba import jit, njit

In [2]:
BASE_DIR = '/home/dmitry/projects/dfdc'
SRC_DIR = os.path.join(BASE_DIR, 'src')
DATA_DIR = os.path.join(BASE_DIR, 'data/dfdc-videos')
SAVE_DIR = os.path.join(BASE_DIR, 'data/dfdc-crops')

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import cv2

import torch
import torchvision

# src
sys.path.insert(0, SRC_DIR)
from sample.reader import VideoReader
from dataset.utils import read_labels

# Pytorch_Retinaface
sys.path.insert(0, os.path.join(BASE_DIR, 'Pytorch_Retinaface'))
from data import cfg_mnet
from models.retinaface import RetinaFace
from detect_utils import detect, load_model

In [4]:
def get_sample(idx, labels_df, reader, n_frames=10, base_dir=DATA_DIR):
    row = labels_df.iloc[idx]
    file_path = os.path.join(base_dir, row.dir, row.name)
    sample, frames = reader.read_frames(file_path, n_frames)
    return sample


def get_text_label(idx, labels_df):
    label = labels_df.iloc[idx]['label']
    label = 'FAKE' if label else 'REAL'
    return label

In [5]:
@njit
def calc_axis(c0, c1, pad, cmax):
    c0 = max(0, c0 - pad)
    c1 = min(cmax, c1 + pad)
    return c0, c1, c1 - c0


@njit
def expand_bbox(bbox, pct):
    bbox = np.copy(bbox)
    bbox[:2] *= 1 - pct
    bbox[2:] *= 1 + pct
    return bbox


@njit
def crop_face(img, bbox, pad_pct=0.05, square=True):
    img_h, img_w, _ = img.shape
    
    if pad_pct > 0:
        bbox = expand_bbox(bbox, pad_pct)
        
    x0, y0, x1, y1 = bbox.astype(np.int16)
    
    if square:
        w, h = x1 - x0, y1 - y0
        if w > h:
            pad = (w - h) // 2
            y0, y1, h = calc_axis(y0, y1, pad, img_h)
        elif h > w:
            pad = (h - w) // 2
            x0, x1, w = calc_axis(x0, x1, pad, img_w)
    
    size = min(w, h)
    face = img[y0:y1, x0:x1][:size, :size]
    return face

In [6]:
def round_num_faces(num_faces, frac_thresh=0.25):
    avg = num_faces.mean()
    fraction, integral = np.modf(avg)
    rounded = integral if fraction < frac_thresh else integral + 1
    return int(rounded)

In [7]:
cfg_mnet

{'name': 'mobilenet0.25',
 'min_sizes': [[16, 32], [64, 128], [256, 512]],
 'steps': [8, 16, 32],
 'variance': [0.1, 0.2],
 'clip': False,
 'loc_weight': 2.0,
 'gpu_train': True,
 'batch_size': 32,
 'ngpu': 1,
 'epoch': 250,
 'decay1': 190,
 'decay2': 220,
 'image_size': 640,
 'pretrain': True,
 'return_layers': {'stage1': 1, 'stage2': 2, 'stage3': 3},
 'in_channel': 32,
 'out_channel': 64}

In [8]:
def init_detector(cfg, weights, use_cpu=False):
    cfg['pretrain'] = False
    net = RetinaFace(cfg=cfg, phase='test')
    net = load_model(net, weights, use_cpu)
    net.eval()
    return net


def mkdirs(base_dir, chunk_dirs):
    for chunk_dir in chunk_dirs:
        dir_path = os.path.join(base_dir, chunk_dir)
        if not os.path.isdir(dir_path):
            os.mkdir(dir_path)


def prepare_data(
        start=0, end=None, 
        num_frames_fake=30, num_frames_real=120,
        use_cpu=False, bs=32,
        base_dir=BASE_DIR, data_dir=DATA_DIR, save_dir=SAVE_DIR):
    df = read_labels(data_dir)
    mkdirs(save_dir, df['dir'].unique())
    
    reader = VideoReader()
    device = torch.device("cpu" if use_cpu else "cuda")
    weights_mnet = os.path.join(base_dir, 'data/weights/mobilenet0.25_Final.pth')
    cfg = {**cfg_mnet, 'batch_size': bs}
    detector = init_detector(cfg, weights_mnet, use_cpu).to(device)
    
    if end is None:
        end = len(df)
        
    for idx in tqdm(range(start, end), total=(end-start)):
        meta = df.iloc[idx]
        fake = bool(meta['label'])
        
        sample_dir = os.path.join(save_dir, meta.dir, meta.name[:-4])
        if not os.path.isdir(sample_dir):
            os.mkdir(sample_dir)
        print(sample_dir)
            
        num_frames = num_frames_fake if fake else num_frames_real
        sample = get_sample(idx, df, reader, n_frames=num_frames)
        detections = detect(sample, detector, cfg_mnet, device)
        num_faces = np.array(list(map(len, detections)), dtype=np.uint8)
        max_faces_per_frame = round_num_faces(num_faces, frac_thresh=0.25)
    
        for f in range(num_frames):
            for det in detections[f][:max_faces_per_frame]:
                face = crop_face(sample[f], det[:4])
                file_path = os.path.join(sample_dir, '%03d.png' % f)
                face = cv2.cvtColor(face, cv2.COLOR_RGB2BGR)
                # cv2.imwrite(file_path, face)
                
        detections = None
        gc.collect()
                
    print('DONE')

In [9]:
%%time
gc.collect()
prepare_data(start=2, end=2500)

Loading pretrained model from /home/dmitry/projects/dfdc/data/weights/mobilenet0.25_Final.pth
remove prefix 'module.'
Missing keys:0
Unused checkpoint keys:0
Used keys:300


HBox(children=(FloatProgress(value=0.0, max=2498.0), HTML(value='')))

/home/dmitry/projects/dfdc/data/dfdc-crops/dfdc_train_part_22/jcwkemycdm


RuntimeError: CUDA out of memory. Tried to allocate 3.71 GiB (GPU 0; 10.91 GiB total capacity; 6.49 GiB already allocated; 3.46 GiB free; 6.49 GiB reserved in total by PyTorch)