In [1]:
import os
import cv2
import dlib
import argparse
import pandas as pd
from tqdm import tqdm
from multiprocessing import Pool

In [2]:
predictor = dlib.shape_predictor('/home/zarana/Downloads/CNN2ELM/shape_predictor_5_face_landmarks.dat')

In [13]:
def align_and_save(path: str):
    """
    Get aligned face and save to disk

    Parameters
    ----------
    path : string
        path to image

    Returns
    -------
    integer
        flag to mark. 1 if success detect face, 0 if fail
    """
    RES_DIR = '/home/zarana/cnn2ranking/chalearn_15/Train_aligned'
    DIR='/home/zarana/cnn2ranking/chalearn_15/Train'
    if os.path.exists(os.path.join(RES_DIR, path)):
        return 1
    flname = os.path.join(DIR, path)
    image = dlib.load_rgb_image(flname)
    detector = dlib.get_frontal_face_detector()
    rects = detector(image, 0)
    # if detect exactly 1 face, get aligned face
    if len(rects) == 1:
        shape = predictor(image, rects[0])
        result = dlib.get_face_chip(image, shape, padding=0.4, size=150)
        folder = RES_DIR
        if not os.path.exists(folder):
            os.makedirs(folder, exist_ok=True)
        flname = os.path.join(RES_DIR, path)
        if not os.path.exists(flname):
            result = cv2.cvtColor(result, cv2.COLOR_BGRA2RGBA)
            cv2.imwrite(flname, result)
        return 1
    return 0



In [11]:
def main():
    DATASET = 'chalearn_15'
    data = pd.read_csv('/home/zarana/cnn2ranking/chalearn_15_train_ageGrp.csv')
    WORKER = 2
    # detector = dlib.get_frontal_face_detector()

    paths = data['full_path'].values
    print('Run face alignment...')
    with Pool(processes=WORKER) as p:
        res = []
        max_ = len(paths)
        with tqdm(total=max_) as pbar:
            for i, j in tqdm(enumerate(p.imap(align_and_save, paths))):
                pbar.update()
                res.append(j)
        data['flag'] = res

        # create new db with only successfully detected face
        data = data.loc[data['flag'] == 1, list(data)[:-1]]
        data.to_csv('/home/zarana/cnn2ranking/chalearn_15_train_ageGrp_cleaned.csv', index=False)

In [12]:
if __name__ == '__main__':
    main()

Run face alignment...


  0%|          | 0/1998 [00:00<?, ?it/s]
  0%|          | 1/1998 [00:00<16:15,  2.05it/s]
  0%|          | 3/1998 [00:00<13:42,  2.43it/s]
  0%|          | 5/1998 [00:01<11:55,  2.78it/s]
  0%|          | 7/1998 [00:01<10:23,  3.19it/s]
  0%|          | 9/1998 [00:02<09:16,  3.58it/s]
  1%|          | 11/1998 [00:02<08:39,  3.82it/s]
  1%|          | 13/1998 [00:03<08:22,  3.95it/s]
  1%|          | 15/1998 [00:03<08:35,  3.85it/s]
  1%|          | 17/1998 [00:04<08:06,  4.07it/s]
  1%|          | 19/1998 [00:04<08:04,  4.09it/s]
  1%|          | 21/1998 [00:05<08:06,  4.07it/s]
  1%|          | 23/1998 [00:05<07:55,  4.16it/s]
  1%|▏         | 25/1998 [00:06<08:21,  3.93it/s]
  1%|▏         | 27/1998 [00:06<08:30,  3.86it/s]
  1%|▏         | 29/1998 [00:07<08:13,  3.99it/s]
  2%|▏         | 31/1998 [00:07<07:44,  4.24it/s]
  2%|▏         | 33/1998 [00:08<07:45,  4.22it/s]
  2%|▏         | 35/1998 [00:08<07:46,  4.21it/s]
  2%|▏         | 37/1998 [00:08<07:23,  4.43it/s]
  2%|▏        

KeyboardInterrupt: 