In [1]:
import os
import cv2
import dlib
import argparse
import pandas as pd
from tqdm import tqdm
from multiprocessing import Pool

In [2]:
predictor = dlib.shape_predictor('/home/zarana/Downloads/CNN2ELM/shape_predictor_5_face_landmarks.dat')

In [9]:
def align_and_save(path: str):
    """
    Get aligned face and save to disk

    Parameters
    ----------
    path : string
        path to image

    Returns
    -------
    integer
        flag to mark. 1 if success detect face, 0 if fail
    """
    RES_DIR = '/home/zarana/cnn2ranking/chalearn_15/Train_aligned'
    DIR='/home/zarana/cnn2ranking/chalearn_15/Train'
    if os.path.exists(os.path.join(RES_DIR, path)):
        return 1
    flname = os.path.join(DIR, path)
    image = dlib.load_rgb_image(flname)
    detector = dlib.get_frontal_face_detector()
    rects = detector(image, 0)
    # if detect exactly 1 face, get aligned face
    if len(rects) == 1:
        shape = predictor(image, rects[0])
        result = dlib.get_face_chip(image, shape, padding=0.4, size=150)
        folder = RES_DIR
        if not os.path.exists(folder):
            os.makedirs(folder, exist_ok=True)
        flname = os.path.join(RES_DIR, path)
        if not os.path.exists(flname):
            result = cv2.cvtColor(result, cv2.COLOR_BGRA2RGBA)
            cv2.imwrite(flname, result)
        return 1
    return 0



In [10]:
def main():
    DATASET = 'chalearn_15'
    data = pd.read_csv('/home/zarana/cnn2ranking/chalearn_15_train_ageGrp.csv')
    WORKER = 2
    # detector = dlib.get_frontal_face_detector()

    paths = data['full_path'].values
    print('Run face alignment...')
    with Pool(processes=WORKER) as p:
        res = []
        max_ = len(paths)
        with tqdm(total=max_) as pbar:
            for i, j in tqdm(enumerate(p.imap(align_and_save, paths))):
                pbar.update()
                res.append(j)
        data['flag'] = res

        # create new db with only successfully detected face
        data = data.loc[data['flag'] == 1, list(data)[:-1]]
        data.to_csv('/home/zarana/cnn2ranking/chalearn_15_train_ageGrp_cleaned.csv', index=False)

In [8]:
if __name__ == '__main__':
    main()

Run face alignment...


  0%|          | 0/1079 [00:00<?, ?it/s]
  0%|          | 1/1079 [00:00<11:07,  1.62it/s]
  0%|          | 2/1079 [00:00<08:32,  2.10it/s]
  0%|          | 3/1079 [00:01<07:45,  2.31it/s]
  0%|          | 4/1079 [00:01<07:45,  2.31it/s]
  0%|          | 5/1079 [00:01<06:44,  2.66it/s]
  1%|          | 6/1079 [00:01<05:26,  3.28it/s]
  1%|          | 7/1079 [00:02<05:52,  3.04it/s]
  1%|          | 9/1079 [00:02<05:12,  3.42it/s]
  1%|          | 10/1079 [00:02<04:20,  4.11it/s]
  1%|          | 11/1079 [00:03<04:16,  4.16it/s]
  1%|          | 12/1079 [00:03<04:16,  4.16it/s]
  1%|          | 13/1079 [00:03<03:46,  4.70it/s]
  1%|▏         | 14/1079 [00:03<04:31,  3.92it/s]
  1%|▏         | 15/1079 [00:05<09:51,  1.80it/s]
  2%|▏         | 18/1079 [00:05<07:04,  2.50it/s]
  2%|▏         | 20/1079 [00:05<06:12,  2.84it/s]
  2%|▏         | 21/1079 [00:06<07:55,  2.23it/s]
  2%|▏         | 23/1079 [00:06<07:07,  2.47it/s]
  2%|▏         | 25/1079 [00:07<06:38,  2.64it/s]
  3%|▎         | 

 39%|███▉      | 425/1079 [02:27<03:50,  2.84it/s]
 39%|███▉      | 426/1079 [02:28<05:05,  2.14it/s]
 40%|███▉      | 428/1079 [02:28<04:41,  2.31it/s]
 40%|███▉      | 430/1079 [02:29<04:10,  2.59it/s]
 40%|████      | 432/1079 [02:29<03:48,  2.83it/s]
 40%|████      | 433/1079 [02:30<03:02,  3.54it/s]
 40%|████      | 434/1079 [02:30<03:17,  3.27it/s]
 40%|████      | 435/1079 [02:30<03:20,  3.22it/s]
 40%|████      | 436/1079 [02:31<03:40,  2.92it/s]
 41%|████      | 437/1079 [02:31<03:13,  3.31it/s]
 41%|████      | 438/1079 [02:31<03:21,  3.18it/s]
 41%|████      | 439/1079 [02:31<03:22,  3.16it/s]
 41%|████      | 440/1079 [02:32<03:29,  3.05it/s]
 41%|████      | 441/1079 [02:32<03:45,  2.83it/s]
 41%|████      | 442/1079 [02:32<03:03,  3.48it/s]
 41%|████      | 443/1079 [02:33<03:32,  3.00it/s]
 41%|████      | 444/1079 [02:33<03:08,  3.37it/s]
 41%|████      | 445/1079 [02:33<03:23,  3.11it/s]
 41%|████▏     | 446/1079 [02:34<02:59,  3.52it/s]
 41%|████▏     | 447/1079 [02:3

 77%|███████▋  | 832/1079 [04:39<01:21,  3.02it/s]
 77%|███████▋  | 834/1079 [04:40<01:18,  3.12it/s]
 77%|███████▋  | 836/1079 [04:40<01:12,  3.34it/s]
 78%|███████▊  | 837/1079 [04:40<01:04,  3.75it/s]
 78%|███████▊  | 838/1079 [04:41<01:22,  2.91it/s]
 78%|███████▊  | 839/1079 [04:41<01:12,  3.33it/s]
 78%|███████▊  | 840/1079 [04:42<01:25,  2.80it/s]
 78%|███████▊  | 841/1079 [04:42<01:18,  3.04it/s]
 78%|███████▊  | 842/1079 [04:42<01:27,  2.72it/s]
 78%|███████▊  | 843/1079 [04:43<01:20,  2.93it/s]
 78%|███████▊  | 844/1079 [04:43<01:15,  3.09it/s]
 78%|███████▊  | 845/1079 [04:43<01:12,  3.22it/s]
 78%|███████▊  | 846/1079 [04:44<01:15,  3.09it/s]
 78%|███████▊  | 847/1079 [04:44<01:13,  3.18it/s]
 79%|███████▊  | 848/1079 [04:44<01:07,  3.41it/s]
 79%|███████▊  | 849/1079 [04:45<01:22,  2.77it/s]
 79%|███████▉  | 850/1079 [04:45<01:24,  2.71it/s]
 79%|███████▉  | 851/1079 [04:45<01:23,  2.72it/s]
 79%|███████▉  | 852/1079 [04:46<01:26,  2.61it/s]
 79%|███████▉  | 853/1079 [04:4

 96%|█████████▌| 1038/1079 [05:44<00:11,  3.60it/s]
 96%|█████████▋| 1039/1079 [05:44<00:12,  3.13it/s]
 96%|█████████▋| 1041/1079 [05:45<00:11,  3.35it/s]
 97%|█████████▋| 1042/1079 [05:45<00:10,  3.63it/s]
 97%|█████████▋| 1043/1079 [05:45<00:09,  3.74it/s]
 97%|█████████▋| 1044/1079 [05:46<00:11,  3.16it/s]
 97%|█████████▋| 1045/1079 [05:47<00:17,  1.95it/s]
 97%|█████████▋| 1048/1079 [05:47<00:13,  2.38it/s]
 97%|█████████▋| 1050/1079 [05:48<00:11,  2.58it/s]
 97%|█████████▋| 1052/1079 [05:49<00:09,  2.82it/s]
 98%|█████████▊| 1054/1079 [05:49<00:08,  3.07it/s]
 98%|█████████▊| 1056/1079 [05:50<00:08,  2.85it/s]
 98%|█████████▊| 1057/1079 [05:50<00:06,  3.22it/s]
 98%|█████████▊| 1058/1079 [05:51<00:07,  2.91it/s]
 98%|█████████▊| 1059/1079 [05:51<00:06,  3.24it/s]
 98%|█████████▊| 1060/1079 [05:52<00:08,  2.34it/s]
 98%|█████████▊| 1062/1079 [05:52<00:07,  2.43it/s]
 99%|█████████▊| 1064/1079 [05:53<00:05,  2.90it/s]
 99%|█████████▊| 1065/1079 [05:53<00:04,  3.34it/s]
 99%|███████