In [1]:
import os
import sys
import random
import argparse
import numpy as np
from PIL import Image, ImageFile
import face_recognition
import glob
from shutil import copyfile
from tqdm import tqdm

ROOT_DIR = os.path.abspath(os.curdir)

# Set seed to sample same set of images each time
random.seed(61)

# We need nose bridge and chin to fit mask on a face
KEY_FACIAL_FEATURES = {'nose_bridge', 'chin'}
MODEL = 'cnn' # cnn or hog cnn is slower than hog but more accurate in terms of face detection 

In [2]:
def create_masked_face(image_path, mask_path):
    # Convert image into format that face_recognition library understands 
    face_image_np = face_recognition.load_image_file(image_path)
    
    # Recognize face boundaries from an image 
    face_locations = face_recognition.face_locations(face_image_np, model=MODEL)
    
    # Find facial landmarks from the recognized face to fit mask
    face_landmarks = face_recognition.face_landmarks(face_image_np, face_locations)
    has_key_face_landmarks = check_face_landmarks(face_landmarks)
    
    if has_key_face_landmarks:
        face_img = Image.fromarray(face_image_np)
        mask_img = Image.open(mask_path)
        face_mask_img = mask_face(face_img, mask_img, face_landmarks[0])
        cropped_face_mask_img = crop_image(face_mask_img, face_locations[0])
        return cropped_face_mask_img
    else:
        return None

def check_face_landmarks(face_landmarks):
    # Check whether there is a face_landmark
    if len(face_landmarks) > 0:
        # Check whether face_landmarks include all key facial features to fit mask
        if face_landmarks[0].keys() >= KEY_FACIAL_FEATURES:
            return True
        else:
            return False
    else:
        return False

def mask_face(face_img, mask_img, face_landmark):
    nose_bridge = face_landmark['nose_bridge']
    nose_point = nose_bridge[len(nose_bridge) * 1 // 4]
    nose_v = np.array(nose_point)

    chin = face_landmark['chin']
    chin_len = len(chin)
    chin_bottom_point = chin[chin_len // 2]
    chin_bottom_v = np.array(chin_bottom_point)
    chin_left_point = chin[chin_len // 8]
    chin_right_point = chin[chin_len * 7 // 8]

    # split mask and resize
    width = mask_img.width
    height = mask_img.height
    width_ratio = 1.2
    new_height = int(np.linalg.norm(nose_v - chin_bottom_v))

    # left
    mask_left_img = mask_img.crop((0, 0, width // 2, height))
    mask_left_width = get_distance_from_point_to_line(chin_left_point, nose_point, chin_bottom_point)
    mask_left_width = int(mask_left_width * width_ratio)
    mask_left_img = mask_left_img.resize((mask_left_width, new_height))

    # right
    mask_right_img = mask_img.crop((width // 2, 0, width, height))
    mask_right_width = get_distance_from_point_to_line(chin_right_point, nose_point, chin_bottom_point)
    mask_right_width = int(mask_right_width * width_ratio)
    mask_right_img = mask_right_img.resize((mask_right_width, new_height))

    # merge mask
    size = (mask_left_img.width + mask_right_img.width, new_height)
    mask_img = Image.new('RGBA', size)
    mask_img.paste(mask_left_img, (0, 0), mask_left_img)
    mask_img.paste(mask_right_img, (mask_left_img.width, 0), mask_right_img)

    # rotate mask
    angle = np.arctan2(chin_bottom_point[1] - nose_point[1], chin_bottom_point[0] - nose_point[0])
    rotated_mask_img = mask_img.rotate(angle, expand=True)

    # calculate mask location
    center_x = (nose_point[0] + chin_bottom_point[0]) // 2
    center_y = (nose_point[1] + chin_bottom_point[1]) // 2

    offset = mask_img.width // 2 - mask_left_img.width
    radian = angle * np.pi / 180
    box_x = center_x + int(offset * np.cos(radian)) - rotated_mask_img.width // 2
    box_y = center_y + int(offset * np.sin(radian)) - rotated_mask_img.height // 2

    # add mask
    face_img.paste(mask_img, (box_x, box_y), mask_img)
    return face_img

def get_distance_from_point_to_line(point, line_point1, line_point2):
    distance = np.abs((line_point2[1] - line_point1[1]) * point[0] +
                      (line_point1[0] - line_point2[0]) * point[1] +
                      (line_point2[0] - line_point1[0]) * line_point1[1] +
                      (line_point1[1] - line_point2[1]) * line_point1[0]) / \
               np.sqrt((line_point2[1] - line_point1[1]) * (line_point2[1] - line_point1[1]) +
                       (line_point1[0] - line_point2[0]) * (line_point1[0] - line_point2[0]))
    return int(distance)

def save(save_dir, fname, face_img):
    dest_path = os.path.join(save_dir, fname)
    face_img.save(dest_path)
    
def crop_image(img, face_location):
    top, right, bottom, left = face_location
    return img.crop((left, top, right, bottom))

In [3]:
available_masks = glob.glob(os.path.join('images', 'mask-images', "*.png"))

available_sample_face_imgs = glob.glob(os.path.join('images', 'sampled_face_images', "*.jpg"))
random.shuffle(available_sample_face_imgs)
target_n_masked_face_imgs = len(available_sample_face_imgs) / 2
n_masked_images = 0

# Create a directory to save masked / not masked images
masked_img_dir = os.path.join(ROOT_DIR, 'images', 'train', 'masked')
not_masked_img_dir = os.path.join(ROOT_DIR, 'images', 'train', 'not_masked')
os.makedirs(os.path.join(ROOT_DIR, 'images', 'train'), exist_ok=True)
os.makedirs(masked_img_dir, exist_ok=True)
os.makedirs(not_masked_img_dir, exist_ok=True)

pbar = tqdm(total=target_n_masked_face_imgs)
while n_masked_images < target_n_masked_face_imgs:
    image_path = available_sample_face_imgs.pop()
    fname  = os.path.basename(image_path)
    
    random_mask_path = random.choice(available_masks)
    masked_face = create_masked_face(image_path, random_mask_path)
    
    if masked_face is not None:
        save(masked_img_dir, fname, masked_face)
        n_masked_images += 1
        pbar.update(1)
pbar.close()

100%|██████████| 1250/1250.0 [15:55<00:00,  1.31it/s]
 20%|█▉        | 245/1235 [02:43<14:13,  1.16it/s]

IndexError: list index out of range

In [5]:
for remaining_img_path in tqdm(available_sample_face_imgs):
    # Convert image into format that face_recognition library understands 
    face_image_np = face_recognition.load_image_file(remaining_img_path)
    
    # Recognize face boundaries from an image 
    face_locations = face_recognition.face_locations(face_image_np, model=MODEL)

    img = Image.open(remaining_img_path)
    if len(face_locations) > 0:
        cropped_img = crop_image(img, face_locations[0])
        fname = os.path.basename(remaining_img_path)
        save(not_masked_img_dir, fname, cropped_img)



  0%|          | 0/1235 [00:00<?, ?it/s][A[A

  0%|          | 1/1235 [00:00<08:51,  2.32it/s][A[A

  0%|          | 2/1235 [00:01<13:27,  1.53it/s][A[A

  0%|          | 3/1235 [00:02<13:21,  1.54it/s][A[A

  0%|          | 4/1235 [00:03<14:14,  1.44it/s][A[A

  0%|          | 5/1235 [00:03<13:52,  1.48it/s][A[A

  0%|          | 6/1235 [00:04<14:51,  1.38it/s][A[A

  1%|          | 7/1235 [00:05<13:37,  1.50it/s][A[A

  1%|          | 8/1235 [00:06<15:22,  1.33it/s][A[A

  1%|          | 9/1235 [00:06<13:52,  1.47it/s][A[A

  1%|          | 10/1235 [00:07<15:27,  1.32it/s][A[A

  1%|          | 11/1235 [00:07<12:55,  1.58it/s][A[A

  1%|          | 12/1235 [00:08<12:24,  1.64it/s][A[A

  1%|          | 13/1235 [00:09<13:16,  1.53it/s][A[A

  1%|          | 14/1235 [00:09<12:56,  1.57it/s][A[A

  1%|          | 15/1235 [00:10<13:42,  1.48it/s][A[A

  1%|▏         | 16/1235 [00:11<17:06,  1.19it/s][A[A

  1%|▏         | 17/1235 [00:11<13:14,  1.53it/

 12%|█▏        | 154/1235 [01:50<09:03,  1.99it/s][A[A

 13%|█▎        | 155/1235 [01:51<08:10,  2.20it/s][A[A

 13%|█▎        | 156/1235 [01:51<09:58,  1.80it/s][A[A

 13%|█▎        | 157/1235 [01:52<10:27,  1.72it/s][A[A

 13%|█▎        | 158/1235 [01:52<08:22,  2.14it/s][A[A

 13%|█▎        | 159/1235 [01:53<07:39,  2.34it/s][A[A

 13%|█▎        | 160/1235 [01:53<06:36,  2.71it/s][A[A

 13%|█▎        | 161/1235 [01:54<11:26,  1.56it/s][A[A

 13%|█▎        | 162/1235 [01:54<10:15,  1.74it/s][A[A

 13%|█▎        | 163/1235 [01:55<09:34,  1.86it/s][A[A

 13%|█▎        | 164/1235 [01:56<14:08,  1.26it/s][A[A

 13%|█▎        | 165/1235 [01:56<10:25,  1.71it/s][A[A

 13%|█▎        | 166/1235 [01:57<09:34,  1.86it/s][A[A

 14%|█▎        | 167/1235 [01:57<09:47,  1.82it/s][A[A

 14%|█▎        | 168/1235 [02:00<19:05,  1.07s/it][A[A

 14%|█▎        | 169/1235 [02:01<17:53,  1.01s/it][A[A

 14%|█▍        | 170/1235 [02:01<13:51,  1.28it/s][A[A

 14%|█▍       

 25%|██▌       | 309/1235 [03:33<19:21,  1.25s/it][A[A

 25%|██▌       | 310/1235 [03:33<14:19,  1.08it/s][A[A

 25%|██▌       | 311/1235 [03:34<13:58,  1.10it/s][A[A

 25%|██▌       | 312/1235 [03:35<16:30,  1.07s/it][A[A

 25%|██▌       | 313/1235 [03:36<12:12,  1.26it/s][A[A

 25%|██▌       | 314/1235 [03:36<12:32,  1.22it/s][A[A

 26%|██▌       | 315/1235 [03:37<09:51,  1.56it/s][A[A

 26%|██▌       | 316/1235 [03:37<10:28,  1.46it/s][A[A

 26%|██▌       | 317/1235 [03:39<13:58,  1.09it/s][A[A

 26%|██▌       | 318/1235 [03:39<10:44,  1.42it/s][A[A

 26%|██▌       | 319/1235 [03:40<10:32,  1.45it/s][A[A

 26%|██▌       | 320/1235 [03:41<13:33,  1.13it/s][A[A

 26%|██▌       | 321/1235 [03:43<18:52,  1.24s/it][A[A

 26%|██▌       | 322/1235 [03:45<20:12,  1.33s/it][A[A

 26%|██▌       | 323/1235 [03:47<22:51,  1.50s/it][A[A

 26%|██▌       | 324/1235 [03:48<19:54,  1.31s/it][A[A

 26%|██▋       | 325/1235 [03:48<15:58,  1.05s/it][A[A

 26%|██▋      

 38%|███▊      | 466/1235 [05:11<04:40,  2.74it/s][A[A

 38%|███▊      | 467/1235 [05:12<06:00,  2.13it/s][A[A

 38%|███▊      | 468/1235 [05:12<04:54,  2.60it/s][A[A

 38%|███▊      | 469/1235 [05:14<10:58,  1.16it/s][A[A

 38%|███▊      | 471/1235 [05:15<09:46,  1.30it/s][A[A

 38%|███▊      | 472/1235 [05:18<17:23,  1.37s/it][A[A

 38%|███▊      | 473/1235 [05:19<16:27,  1.30s/it][A[A

 38%|███▊      | 474/1235 [05:19<11:54,  1.06it/s][A[A

 38%|███▊      | 475/1235 [05:19<09:04,  1.40it/s][A[A

 39%|███▊      | 476/1235 [05:20<10:13,  1.24it/s][A[A

 39%|███▊      | 477/1235 [05:21<08:54,  1.42it/s][A[A

 39%|███▊      | 478/1235 [05:28<31:33,  2.50s/it][A[A

 39%|███▉      | 479/1235 [05:28<24:05,  1.91s/it][A[A

 39%|███▉      | 480/1235 [05:31<26:51,  2.13s/it][A[A

 39%|███▉      | 481/1235 [05:32<22:46,  1.81s/it][A[A

 39%|███▉      | 482/1235 [05:33<21:59,  1.75s/it][A[A

 39%|███▉      | 483/1235 [05:34<18:13,  1.45s/it][A[A

 39%|███▉     

 50%|█████     | 619/1235 [07:33<14:40,  1.43s/it][A[A

 50%|█████     | 620/1235 [07:33<11:03,  1.08s/it][A[A

 50%|█████     | 621/1235 [07:34<10:24,  1.02s/it][A[A

 50%|█████     | 623/1235 [07:35<08:18,  1.23it/s][A[A

 51%|█████     | 624/1235 [07:35<06:10,  1.65it/s][A[A

 51%|█████     | 625/1235 [07:36<08:23,  1.21it/s][A[A

 51%|█████     | 626/1235 [07:37<06:47,  1.49it/s][A[A

 51%|█████     | 627/1235 [07:37<05:16,  1.92it/s][A[A

 51%|█████     | 628/1235 [07:37<05:01,  2.02it/s][A[A

 51%|█████     | 629/1235 [07:38<05:15,  1.92it/s][A[A

 51%|█████     | 631/1235 [07:39<05:13,  1.93it/s][A[A

 51%|█████     | 632/1235 [07:39<04:35,  2.19it/s][A[A

 51%|█████▏    | 633/1235 [07:39<03:38,  2.75it/s][A[A

 51%|█████▏    | 634/1235 [07:41<06:21,  1.58it/s][A[A

 51%|█████▏    | 635/1235 [07:41<04:59,  2.00it/s][A[A

 51%|█████▏    | 636/1235 [07:42<06:31,  1.53it/s][A[A

 52%|█████▏    | 637/1235 [07:42<05:07,  1.94it/s][A[A

 52%|█████▏   

 63%|██████▎   | 777/1235 [09:21<02:05,  3.66it/s][A[A

 63%|██████▎   | 779/1235 [09:21<01:46,  4.29it/s][A[A

 63%|██████▎   | 780/1235 [09:23<05:17,  1.43it/s][A[A

 63%|██████▎   | 782/1235 [09:23<03:55,  1.92it/s][A[A

 63%|██████▎   | 783/1235 [09:24<03:37,  2.08it/s][A[A

 63%|██████▎   | 784/1235 [09:24<03:06,  2.41it/s][A[A

 64%|██████▎   | 785/1235 [09:24<03:03,  2.45it/s][A[A

 64%|██████▎   | 786/1235 [09:25<02:47,  2.68it/s][A[A

 64%|██████▎   | 787/1235 [09:25<02:15,  3.30it/s][A[A

 64%|██████▍   | 788/1235 [09:25<02:52,  2.58it/s][A[A

 64%|██████▍   | 789/1235 [09:26<02:50,  2.61it/s][A[A

 64%|██████▍   | 790/1235 [09:26<02:32,  2.92it/s][A[A

 64%|██████▍   | 791/1235 [09:26<02:16,  3.26it/s][A[A

 64%|██████▍   | 792/1235 [09:26<01:57,  3.78it/s][A[A

 64%|██████▍   | 793/1235 [09:27<02:39,  2.77it/s][A[A

 64%|██████▍   | 794/1235 [09:28<03:42,  1.98it/s][A[A

 64%|██████▍   | 795/1235 [09:28<03:27,  2.12it/s][A[A

 64%|██████▍  

 75%|███████▌  | 932/1235 [10:50<03:44,  1.35it/s][A[A

 76%|███████▌  | 933/1235 [10:50<02:45,  1.82it/s][A[A

 76%|███████▌  | 934/1235 [10:51<02:48,  1.79it/s][A[A

 76%|███████▌  | 935/1235 [10:52<03:07,  1.60it/s][A[A

 76%|███████▌  | 936/1235 [10:52<02:38,  1.88it/s][A[A

 76%|███████▌  | 937/1235 [10:52<02:02,  2.43it/s][A[A

 76%|███████▌  | 938/1235 [10:53<02:02,  2.42it/s][A[A

 76%|███████▌  | 939/1235 [10:53<01:42,  2.90it/s][A[A

 76%|███████▌  | 940/1235 [10:53<02:05,  2.35it/s][A[A

 76%|███████▌  | 941/1235 [10:54<01:38,  2.99it/s][A[A

 76%|███████▋  | 942/1235 [10:56<04:04,  1.20it/s][A[A

 76%|███████▋  | 943/1235 [10:56<03:21,  1.45it/s][A[A

 77%|███████▋  | 945/1235 [10:57<03:03,  1.58it/s][A[A

 77%|███████▋  | 946/1235 [10:57<02:49,  1.71it/s][A[A

 77%|███████▋  | 947/1235 [10:57<02:06,  2.27it/s][A[A

 77%|███████▋  | 948/1235 [10:58<01:38,  2.92it/s][A[A

 77%|███████▋  | 949/1235 [10:58<01:31,  3.13it/s][A[A

 77%|███████▋ 

 88%|████████▊ | 1087/1235 [12:23<02:02,  1.21it/s][A[A

 88%|████████▊ | 1088/1235 [12:24<01:48,  1.36it/s][A[A

 88%|████████▊ | 1089/1235 [12:27<03:11,  1.31s/it][A[A

 88%|████████▊ | 1091/1235 [13:23<22:34,  9.40s/it][A[A

 88%|████████▊ | 1092/1235 [13:25<16:36,  6.97s/it][A[A

 89%|████████▊ | 1094/1235 [13:25<11:39,  4.96s/it][A[A

 89%|████████▊ | 1095/1235 [13:25<08:12,  3.52s/it][A[A

 89%|████████▉ | 1097/1235 [13:26<05:48,  2.53s/it][A[A

 89%|████████▉ | 1098/1235 [13:27<05:06,  2.24s/it][A[A

 89%|████████▉ | 1099/1235 [13:27<03:36,  1.60s/it][A[A

 89%|████████▉ | 1100/1235 [13:29<03:56,  1.75s/it][A[A

 89%|████████▉ | 1101/1235 [13:30<02:52,  1.29s/it][A[A

 89%|████████▉ | 1102/1235 [13:30<02:09,  1.03it/s][A[A

 89%|████████▉ | 1103/1235 [13:31<01:56,  1.13it/s][A[A

 89%|████████▉ | 1104/1235 [13:31<01:26,  1.52it/s][A[A

 89%|████████▉ | 1105/1235 [13:31<01:07,  1.94it/s][A[A

 90%|████████▉ | 1106/1235 [13:32<01:14,  1.73it/s][A[