In [1]:
import cv2
import numpy as np
from glob import glob
from tqdm import tqdm

import os
import re

In [2]:
# create glob object for both train and test directories
train_img_glob = glob('data/train/*/*.jpg')
test_img_glob = glob('data/test/*/*.jpg')

# train and test directories
base = os.path.dirname('')
TRAIN_DIR = os.path.join(base + 'data/train')
TEST_DIR = os.path.join(base + 'data/test')

# initiate model and weights for extracting faces
prototxt_file = os.path.join(base + 'data/deploy.prototxt.txt')
caffemodel_file = os.path.join(base + 'data/weights.caffemodel')

In [3]:
# count total images from both directories
total = len(train_img_glob) + len(test_img_glob)
percent_train = int(len(train_img_glob)/total * 100)
percent_test = int(len(test_img_glob)/total * 100)

print("Total images:", total)
print("Total number of training images before preprocessing:", len(train_img_glob), "(", percent_train, "%)")
print("Total number of training images before preprocessing:", len(test_img_glob), "(", percent_test, "%)")

Total images: 1810
Total number of training images before preprocessing: 1480 ( 81 %)
Total number of training images before preprocessing: 330 ( 18 %)


In [4]:
# use cv2 built-in method cv2.dnn.readNetFromCaffe to extract faces
face_model = cv2.dnn.readNetFromCaffe(prototxt_file, caffemodel_file)

In [5]:
print('Initiating face extraction process (Training)...')

SAVE = './data/faces/train/'
CONFIDENCE = 0.5
train_skipped = 0

count = 0
last = 'dahyun'
# training image
print('[STATUS] Reading & extracting image...')
for img in tqdm(train_img_glob):
    
    label = re.findall('\\\\[a-z].*\\\\', img)[0]
    label = label[1:-1]
    
    if not os.path.isdir(os.path.join(SAVE, label)):
        os.mkdir(os.path.join(SAVE, label))
    
    if label != last:
        count = 0
        last = label
    else:
        count = count + 1
    
    # read in the image
    a = cv2.imread(img)
    (h,w) = a.shape[:2]
    # create a blob object
    blob = cv2.dnn.blobFromImage(a, scalefactor=1.0, size=(224, 224), 
                                 mean=(104.0, 177.0, 123.0), swapRB=False, crop=False)
    face_model.setInput(blob)
    # detect face
    detector = face_model.forward() 
    
    # if any face is detected
    if len(detector) > 0:
        # get the index of the face
        i = np.argmax(detector[0,0,:,2])
        confidence = detector[0,0,i,2]
        
        # if the confidence is higher than the threshold we set earlier
        if confidence > CONFIDENCE:
            # extract face from a rectangle
            rect = detector[0,0,i,3:7] * np.array([w,h,w,h])
            start_x, start_y, end_x, end_y = rect.astype('int')
            
            face = a[start_y:end_y, start_x:end_x]
            
            # skip it if there is no face
            if face.size == 0:
                print('[INFO] Skipping... No face detected:', img)
                train_skipped += 1
                continue
            else:
                img_file_name = label + '_face_' + str(count) + '.jpg'
#                 print(SAVE + label + '/' + img_file_name)
#                 print(os.path.join(SAVE, label, img_file_name))
                cv2.imwrite(SAVE + label + '/' + img_file_name, face)
        
        # skip otherwise
        else:
            print('[INFO] Skipping... Confidence below threshold:', img)
            train_skipped += 1
            continue

print('[STATUS] Face extraction process completed!')

  0%|                                                                                         | 0/1480 [00:00<?, ?it/s]

Initiating face extraction process (Training)...
[STATUS] Reading & extracting image...


  2%|█▊                                                                              | 33/1480 [00:00<00:55, 25.93it/s]

[INFO] Skipping... No face detected: data/train\dahyun\RCPC_members-1209682000044445697-20191225_104718-img3.jpg


  4%|███▏                                                                            | 60/1480 [00:02<00:57, 24.55it/s]

[INFO] Skipping... No face detected: data/train\dahyun\RCPC_members-1235880708607885312-20200306_175137-img1.jpg
[INFO] Skipping... Confidence below threshold: data/train\dahyun\RCPC_members-1235880708607885312-20200306_175137-img2.jpg


  9%|███████                                                                        | 133/1480 [00:04<00:55, 24.46it/s]

[INFO] Skipping... Confidence below threshold: data/train\dahyun\RCPC_members-1259693901591408642-20200511_105644-img1.jpg
[INFO] Skipping... Confidence below threshold: data/train\dahyun\RCPC_members-1259693901591408642-20200511_105644-img2.jpg


 12%|█████████▍                                                                     | 177/1480 [00:07<01:13, 17.63it/s]

[INFO] Skipping... Confidence below threshold: data/train\dahyun\RCPC_members-1274210773338710017-20200620_122136-img4.jpg


 13%|█████████▉                                                                     | 187/1480 [00:07<01:17, 16.66it/s]

[INFO] Skipping... No face detected: data/train\dahyun\RCPC_members-1278613608243449856-20200702_155654-img3.jpg


 23%|██████████████████                                                             | 339/1480 [00:12<00:37, 30.78it/s]

[INFO] Skipping... Confidence below threshold: data/train\dahyun\RCPC_members-1309423017290641408-20200925_162229-img2.jpg


 25%|███████████████████▉                                                           | 374/1480 [00:13<00:24, 45.39it/s]

[INFO] Skipping... Confidence below threshold: data/train\juri\RCPC_members-1194528907459850240-20191113_151419-img3.jpg


 27%|█████████████████████                                                          | 395/1480 [00:13<00:24, 44.44it/s]

[INFO] Skipping... Confidence below threshold: data/train\juri\RCPC_members-1209747487277215745-20191225_150731-img3.jpg
[INFO] Skipping... Confidence below threshold: data/train\juri\RCPC_members-1227447059273469960-20200212_111918-img1.jpg


 27%|█████████████████████▌                                                         | 405/1480 [00:13<00:23, 45.91it/s]

[INFO] Skipping... Confidence below threshold: data/train\juri\RCPC_members-1227940282827542530-20200213_195912-img2.jpg
[INFO] Skipping... No face detected: data/train\juri\RCPC_members-1231183271473532929-20200222_184540-img2.jpg


 28%|██████████████████████▍                                                        | 421/1480 [00:14<00:22, 47.71it/s]

[INFO] Skipping... Confidence below threshold: data/train\juri\RCPC_members-1247466331043008512-20200407_170844-img4.jpg


 30%|███████████████████████▋                                                       | 444/1480 [00:14<00:21, 49.04it/s]

[INFO] Skipping... No face detected: data/train\juri\RCPC_members-1276446925558956038-20200626_162716-img1.jpg
[INFO] Skipping... No face detected: data/train\juri\RCPC_members-1276446925558956038-20200626_162716-img2.jpg
[INFO] Skipping... Confidence below threshold: data/train\juri\RCPC_members-1276446925558956038-20200626_162716-img3.jpg
[INFO] Skipping... No face detected: data/train\juri\RCPC_members-1286271951598624770-20200723_190825-img1.jpg


 31%|████████████████████████▏                                                      | 454/1480 [00:14<00:21, 46.95it/s]

[INFO] Skipping... Confidence below threshold: data/train\juri\RCPC_members-1289384316514926597-20200801_091551-img3.jpg


 32%|█████████████████████████▎                                                     | 475/1480 [00:15<00:21, 47.62it/s]

[INFO] Skipping... Confidence below threshold: data/train\juri\RCPC_members-1294962227716341764-20200816_184028-img2.jpg


 35%|███████████████████████████▎                                                   | 511/1480 [00:15<00:21, 46.05it/s]

[INFO] Skipping... Confidence below threshold: data/train\juri\RCPC_members-1304714924044607489-20200912_163412-img2.jpg


 39%|██████████████████████████████▋                                                | 576/1480 [00:17<00:19, 46.19it/s]

[INFO] Skipping... Confidence below threshold: data/train\sohee\RCPC_members-1294117874835963904-20200814_104519-img2.jpg
[INFO] Skipping... Confidence below threshold: data/train\sohee\RCPC_members-1294117874835963904-20200814_104519-img3.jpg
[INFO] Skipping... Confidence below threshold: data/train\sohee\RCPC_members-1294282849529425922-20200814_214052-img1.jpg


 41%|████████████████████████████████▏                                              | 602/1480 [00:18<00:18, 48.05it/s]

[INFO] Skipping... Confidence below threshold: data/train\sohee\sohee10.jpg
[INFO] Skipping... Confidence below threshold: data/train\suyun\RCPC_members-1193437052534018048-20191110_145540-img1.jpg


 43%|██████████████████████████████████▎                                            | 643/1480 [00:19<00:22, 37.53it/s]

[INFO] Skipping... Confidence below threshold: data/train\suyun\RCPC_members-1224632055251652608-20200204_165329-img1.jpg


 45%|███████████████████████████████████▏                                           | 659/1480 [00:20<00:32, 25.48it/s]

[INFO] Skipping... Confidence below threshold: data/train\suyun\RCPC_members-1236927187900456965-20200309_150957-img2.jpg


 45%|███████████████████████████████████▊                                           | 670/1480 [00:20<00:29, 27.65it/s]

[INFO] Skipping... Confidence below threshold: data/train\suyun\RCPC_members-1252515077103607809-20200421_153039-img1.jpg
[INFO] Skipping... No face detected: data/train\suyun\RCPC_members-1253273361347731456-20200423_174348-img3.jpg


 47%|████████████████████████████████████▊                                          | 689/1480 [00:21<00:26, 30.27it/s]

[INFO] Skipping... Confidence below threshold: data/train\suyun\RCPC_members-1260480082734432257-20200513_150044-img1.jpg
[INFO] Skipping... Confidence below threshold: data/train\suyun\RCPC_members-1260480082734432257-20200513_150044-img2.jpg
[INFO] Skipping... Confidence below threshold: data/train\suyun\RCPC_members-1260480082734432257-20200513_150044-img3.jpg
[INFO] Skipping... No face detected: data/train\suyun\RCPC_members-1260879449752350720-20200514_172741-img1.jpg


 47%|█████████████████████████████████████▏                                         | 697/1480 [00:21<00:25, 30.87it/s]

[INFO] Skipping... Confidence below threshold: data/train\suyun\RCPC_members-1262006740901486593-20200517_200708-img2.jpg
[INFO] Skipping... Confidence below threshold: data/train\suyun\RCPC_members-1263339729694494721-20200521_122357-img1.jpg


 48%|█████████████████████████████████████▋                                         | 705/1480 [00:21<00:26, 29.32it/s]

[INFO] Skipping... Confidence below threshold: data/train\suyun\RCPC_members-1263339729694494721-20200521_122357-img2.jpg


 48%|█████████████████████████████████████▉                                         | 710/1480 [00:21<00:24, 31.54it/s]

[INFO] Skipping... Confidence below threshold: data/train\suyun\RCPC_members-1265520694315581447-20200527_125020-img4.jpg
[INFO] Skipping... Confidence below threshold: data/train\suyun\RCPC_members-1266572214372974592-20200530_102842-img1.jpg
[INFO] Skipping... Confidence below threshold: data/train\suyun\RCPC_members-1267051548497997825-20200531_181324-img2.jpg


 49%|██████████████████████████████████████▎                                        | 718/1480 [00:22<00:24, 31.70it/s]

[INFO] Skipping... Confidence below threshold: data/train\suyun\RCPC_members-1267051548497997825-20200531_181324-img3.jpg


 50%|███████████████████████████████████████▋                                       | 744/1480 [00:22<00:20, 35.97it/s]

[INFO] Skipping... Confidence below threshold: data/train\suyun\RCPC_members-1277929985727062017-20200630_184026-img1.jpg
[INFO] Skipping... Confidence below threshold: data/train\suyun\RCPC_members-1277929985727062017-20200630_184026-img2.jpg
[INFO] Skipping... Confidence below threshold: data/train\suyun\RCPC_members-1277929985727062017-20200630_184026-img3.jpg
[INFO] Skipping... Confidence below threshold: data/train\suyun\RCPC_members-1278311264385552384-20200701_195529-img3.jpg


 52%|████████████████████████████████████████▊                                      | 764/1480 [00:23<00:21, 33.01it/s]

[INFO] Skipping... Confidence below threshold: data/train\suyun\RCPC_members-1289013296675188736-20200731_084133-img2.jpg
[INFO] Skipping... Confidence below threshold: data/train\suyun\RCPC_members-1289013296675188736-20200731_084133-img4.jpg


 52%|█████████████████████████████████████████▍                                     | 776/1480 [00:23<00:20, 33.95it/s]

[INFO] Skipping... Confidence below threshold: data/train\suyun\RCPC_members-1290295485798875142-20200803_213631-img2.jpg
[INFO] Skipping... Confidence below threshold: data/train\suyun\RCPC_members-1290295485798875142-20200803_213631-img4.jpg
[INFO] Skipping... Confidence below threshold: data/train\suyun\RCPC_members-1291744014299881472-20200807_213227-img1.jpg


 53%|█████████████████████████████████████████▉                                     | 785/1480 [00:23<00:18, 37.40it/s]

[INFO] Skipping... Confidence below threshold: data/train\suyun\RCPC_members-1291744014299881472-20200807_213227-img2.jpg
[INFO] Skipping... Confidence below threshold: data/train\suyun\RCPC_members-1292775367422251009-20200810_175040-img3.jpg


 54%|██████████████████████████████████████████▎                                    | 793/1480 [00:24<00:20, 32.79it/s]

[INFO] Skipping... Confidence below threshold: data/train\suyun\RCPC_members-1293892086509395969-20200813_194807-img3.jpg


 54%|██████████████████████████████████████████▊                                    | 801/1480 [00:24<00:23, 29.05it/s]

[INFO] Skipping... Confidence below threshold: data/train\suyun\RCPC_members-1295921910614638592-20200819_101355-img2.jpg


 55%|███████████████████████████████████████████▌                                   | 817/1480 [00:24<00:20, 32.91it/s]

[INFO] Skipping... Confidence below threshold: data/train\suyun\RCPC_members-1297880148205740040-20200824_195515-img4.jpg


 56%|████████████████████████████████████████████▌                                  | 834/1480 [00:25<00:20, 32.27it/s]

[INFO] Skipping... Confidence below threshold: data/train\suyun\RCPC_members-1301861566925611008-20200904_193559-img3.jpg
[INFO] Skipping... Confidence below threshold: data/train\suyun\RCPC_members-1301861566925611008-20200904_193559-img4.jpg


 58%|█████████████████████████████████████████████▌                                 | 854/1480 [00:26<00:18, 33.09it/s]

[INFO] Skipping... Confidence below threshold: data/train\suyun\RCPC_members-1305037865278042112-20200913_135728-img1.jpg


 58%|██████████████████████████████████████████████                                 | 862/1480 [00:26<00:18, 33.82it/s]

[INFO] Skipping... Confidence below threshold: data/train\suyun\RCPC_members-1306851542503362561-20200918_140422-img2.jpg
[INFO] Skipping... Confidence below threshold: data/train\suyun\RCPC_members-1306851542503362561-20200918_140422-img3.jpg
[INFO] Skipping... Confidence below threshold: data/train\suyun\RCPC_members-1306851542503362561-20200918_140422-img4.jpg


 59%|██████████████████████████████████████████████▋                                | 874/1480 [00:26<00:18, 32.84it/s]

[INFO] Skipping... No face detected: data/train\suyun\RCPC_members-1310112297394741248-20200927_140127-img1.jpg
[INFO] Skipping... No face detected: data/train\suyun\RCPC_members-1310112297394741248-20200927_140127-img4.jpg


 63%|█████████████████████████████████████████████████▊                             | 933/1480 [00:28<00:17, 31.23it/s]

[INFO] Skipping... Confidence below threshold: data/train\yeonhee\RCPC_members-1238449909377798144-20200313_200042-img4.jpg


 64%|██████████████████████████████████████████████████▏                            | 941/1480 [00:28<00:18, 28.82it/s]

[INFO] Skipping... No face detected: data/train\yeonhee\RCPC_members-1249661506569957377-20200413_183135-img3.jpg
[INFO] Skipping... No face detected: data/train\yeonhee\RCPC_members-1259462995492524033-20200510_193912-img1.jpg


 64%|██████████████████████████████████████████████████▊                            | 953/1480 [00:28<00:17, 30.65it/s]

[INFO] Skipping... Confidence below threshold: data/train\yeonhee\RCPC_members-1276385382100119555-20200626_122243-img3.jpg
[INFO] Skipping... Confidence below threshold: data/train\yeonhee\RCPC_members-1276771580220018688-20200627_135720-img4.jpg
[INFO] Skipping... Confidence below threshold: data/train\yeonhee\RCPC_members-1276772364475228160-20200627_140027-img1.jpg


 66%|███████████████████████████████████████████████████▉                           | 974/1480 [00:29<00:15, 33.41it/s]

[INFO] Skipping... Confidence below threshold: data/train\yeonhee\RCPC_members-1278973120041238528-20200703_154528-img3.jpg
[INFO] Skipping... Confidence below threshold: data/train\yeonhee\RCPC_members-1278973120041238528-20200703_154528-img4.jpg


 66%|████████████████████████████████████████████████████▌                          | 984/1480 [00:29<00:12, 39.25it/s]

[INFO] Skipping... No face detected: data/train\yeonhee\RCPC_members-1286272910634565633-20200723_191214-img1.jpg
[INFO] Skipping... No face detected: data/train\yeonhee\RCPC_members-1286272910634565633-20200723_191214-img2.jpg


 68%|████████████████████████████████████████████████████▊                         | 1002/1480 [00:30<00:16, 29.13it/s]

[INFO] Skipping... Confidence below threshold: data/train\yeonhee\RCPC_members-1290870909809516545-20200805_114302-img2.jpg


 68%|█████████████████████████████████████████████████████                         | 1006/1480 [00:30<00:15, 30.24it/s]

[INFO] Skipping... Confidence below threshold: data/train\yeonhee\RCPC_members-1291747270426112000-20200807_214523-img1.jpg


 69%|█████████████████████████████████████████████████████▊                        | 1020/1480 [00:31<00:15, 29.97it/s]

[INFO] Skipping... Confidence below threshold: data/train\yeonhee\RCPC_members-1294613030089551873-20200815_193253-img3.jpg


 69%|██████████████████████████████████████████████████████▏                       | 1028/1480 [00:31<00:13, 33.17it/s]

[INFO] Skipping... No face detected: data/train\yeonhee\RCPC_members-1295704768950571008-20200818_195104-img1.jpg
[INFO] Skipping... Confidence below threshold: data/train\yeonhee\RCPC_members-1296761770292310021-20200821_175113-img1.jpg


 70%|██████████████████████████████████████████████████████▌                       | 1036/1480 [00:31<00:14, 31.34it/s]

[INFO] Skipping... Confidence below threshold: data/train\yeonhee\RCPC_members-1296761770292310021-20200821_175113-img4.jpg


 71%|███████████████████████████████████████████████████████▍                      | 1052/1480 [00:32<00:14, 29.47it/s]

[INFO] Skipping... Confidence below threshold: data/train\yeonhee\RCPC_members-1298600891415588864-20200826_193914-img2.jpg
[INFO] Skipping... Confidence below threshold: data/train\yeonhee\RCPC_members-1298600891415588864-20200826_193914-img3.jpg
[INFO] Skipping... No face detected: data/train\yeonhee\RCPC_members-1299625549736812544-20200829_153051-img1.jpg
[INFO] Skipping... No face detected: data/train\yeonhee\RCPC_members-1299625549736812544-20200829_153051-img2.jpg


 71%|███████████████████████████████████████████████████████▊                      | 1058/1480 [00:32<00:16, 25.49it/s]

[INFO] Skipping... No face detected: data/train\yeonhee\RCPC_members-1299934906760359936-20200830_120008-img4.jpg


 72%|████████████████████████████████████████████████████████▏                     | 1067/1480 [00:32<00:16, 24.39it/s]

[INFO] Skipping... No face detected: data/train\yeonhee\RCPC_members-1300401341462839296-20200831_185334-img2.jpg
[INFO] Skipping... No face detected: data/train\yeonhee\RCPC_members-1300401341462839296-20200831_185334-img4.jpg


 73%|████████████████████████████████████████████████████████▌                     | 1074/1480 [00:33<00:15, 26.87it/s]

[INFO] Skipping... Confidence below threshold: data/train\yeonhee\RCPC_members-1301040070145708033-20200902_131139-img3.jpg


 73%|█████████████████████████████████████████████████████████▏                    | 1086/1480 [00:33<00:14, 28.02it/s]

[INFO] Skipping... No face detected: data/train\yeonhee\RCPC_members-1303660563944235009-20200909_184433-img3.jpg


 75%|██████████████████████████████████████████████████████████▏                   | 1103/1480 [00:34<00:12, 29.13it/s]

[INFO] Skipping... No face detected: data/train\yeonhee\RCPC_members-1306121089269903363-20200916_134148-img4.jpg


 75%|██████████████████████████████████████████████████████████▊                   | 1115/1480 [00:34<00:12, 30.15it/s]

[INFO] Skipping... Confidence below threshold: data/train\yeonhee\RCPC_members-1309370946633916416-20200925_125535-img1.jpg
[INFO] Skipping... Confidence below threshold: data/train\yeonhee\RCPC_members-1309370946633916416-20200925_125535-img2.jpg
[INFO] Skipping... No face detected: data/train\yeonhee\yeonhee10.jpg


 76%|███████████████████████████████████████████████████████████▍                  | 1127/1480 [00:34<00:11, 30.77it/s]

[INFO] Skipping... No face detected: data/train\yeonhee\yeonhee4.jpg


 83%|█████████████████████████████████████████████████████████████████             | 1235/1480 [00:38<00:08, 27.78it/s]

[INFO] Skipping... Confidence below threshold: data/train\yunkyoung\RCPC_members-1263023632038547456-20200520_152754-img3.jpg


 85%|██████████████████████████████████████████████████████████████████▏           | 1255/1480 [00:39<00:07, 28.44it/s]

[INFO] Skipping... No face detected: data/train\yunkyoung\RCPC_members-1267715837143642112-20200602_141303-img1.jpg


 87%|███████████████████████████████████████████████████████████████████▌          | 1283/1480 [00:40<00:06, 28.62it/s]

[INFO] Skipping... No face detected: data/train\yunkyoung\RCPC_members-1276118741844062208-20200625_184311-img3.jpg


 87%|███████████████████████████████████████████████████████████████████▉          | 1289/1480 [00:40<00:06, 28.72it/s]

[INFO] Skipping... Confidence below threshold: data/train\yunkyoung\RCPC_members-1277218408086425600-20200628_193252-img2.jpg
[INFO] Skipping... Confidence below threshold: data/train\yunkyoung\RCPC_members-1277581915113918465-20200629_193719-img3.jpg


 89%|█████████████████████████████████████████████████████████████████████▏        | 1313/1480 [00:41<00:04, 33.90it/s]

[INFO] Skipping... No face detected: data/train\yunkyoung\RCPC_members-1288139593556963329-20200728_224946-img3.jpg


 89%|█████████████████████████████████████████████████████████████████████▌        | 1321/1480 [00:41<00:04, 31.90it/s]

[INFO] Skipping... No face detected: data/train\yunkyoung\RCPC_members-1289939073516232704-20200802_220015-img1.jpg


 90%|██████████████████████████████████████████████████████████████████████▎       | 1333/1480 [00:41<00:04, 30.96it/s]

[INFO] Skipping... Confidence below threshold: data/train\yunkyoung\RCPC_members-1290664356976513026-20200804_220216-img2.jpg


 91%|██████████████████████████████████████████████████████████████████████▋       | 1341/1480 [00:42<00:04, 32.00it/s]

[INFO] Skipping... No face detected: data/train\yunkyoung\RCPC_members-1292436361744588800-20200809_192335-img3.jpg
[INFO] Skipping... No face detected: data/train\yunkyoung\RCPC_members-1292436361744588800-20200809_192335-img4.jpg


 93%|████████████████████████████████████████████████████████████████████████▌     | 1377/1480 [00:43<00:03, 29.80it/s]

[INFO] Skipping... Confidence below threshold: data/train\yunkyoung\RCPC_members-1297449867434835971-20200823_152528-img1.jpg
[INFO] Skipping... Confidence below threshold: data/train\yunkyoung\RCPC_members-1297449867434835971-20200823_152528-img2.jpg


 95%|██████████████████████████████████████████████████████████████████████████▎   | 1411/1480 [00:44<00:02, 27.27it/s]

[INFO] Skipping... No face detected: data/train\yunkyoung\RCPC_members-1300722940200153090-20200901_161129-img3.jpg


 96%|██████████████████████████████████████████████████████████████████████████▉   | 1422/1480 [00:44<00:01, 30.42it/s]

[INFO] Skipping... Confidence below threshold: data/train\yunkyoung\RCPC_members-1302499576176238594-20200906_135112-img3.jpg
[INFO] Skipping... Confidence below threshold: data/train\yunkyoung\RCPC_members-1302499576176238594-20200906_135112-img4.jpg


 98%|████████████████████████████████████████████████████████████████████████████▍ | 1451/1480 [00:45<00:00, 32.95it/s]

[INFO] Skipping... Confidence below threshold: data/train\yunkyoung\RCPC_members-1307653534494519297-20200920_191112-img1.jpg
[INFO] Skipping... Confidence below threshold: data/train\yunkyoung\RCPC_members-1307653534494519297-20200920_191112-img2.jpg
[INFO] Skipping... Confidence below threshold: data/train\yunkyoung\RCPC_members-1307653534494519297-20200920_191112-img3.jpg
[INFO] Skipping... Confidence below threshold: data/train\yunkyoung\RCPC_members-1307653534494519297-20200920_191112-img4.jpg


100%|██████████████████████████████████████████████████████████████████████████████| 1480/1480 [00:46<00:00, 31.67it/s]

[STATUS] Face extraction process completed!





In [6]:
print('Initiating face extraction process (Validation)...')

SAVE = './data/faces/valid/'
CONFIDENCE = 0.5
valid_skipped = 0

count = 0
last = 'dahyun'
# training image
print('[STATUS] Reading & extracting image...')
for img in tqdm(test_img_glob):
    
    label = re.findall('\\\\[a-z].*\\\\', img)[0]
    label = label[1:-1]
    
    if not os.path.isdir(os.path.join(SAVE, label)):
        os.mkdir(os.path.join(SAVE, label))
    
    if label != last:
        count = 0
        last = label
    else:
        count = count + 1
    
    # read in the image
    a = cv2.imread(img)
    (h,w) = a.shape[:2]
    # create a blob object
    blob = cv2.dnn.blobFromImage(a, scalefactor=1.0, size=(224, 224), 
                                 mean=(104.0, 177.0, 123.0), swapRB=False, crop=False)
    face_model.setInput(blob)
    # detect face
    detector = face_model.forward() 
    
    # if any face is detected
    if len(detector) > 0:
        # get the index of the face
        i = np.argmax(detector[0,0,:,2])
        confidence = detector[0,0,i,2]
        
        # if the confidence is higher than the threshold we set earlier
        if confidence > CONFIDENCE:
            # extract face from a rectangle
            rect = detector[0,0,i,3:7] * np.array([w,h,w,h])
            start_x, start_y, end_x, end_y = rect.astype('int')
            
            face = a[start_y:end_y, start_x:end_x]
            
            # skip it if there is no face
            if face.size == 0:
                print('[INFO] Skipping... No face detected:', img)
                valid_skipped += 1
                continue
            else:
                img_file_name = label + '_face_' + str(count) + '_valid.jpg'
#                 print(SAVE + label + '/' + img_file_name)
#                 print(os.path.join(SAVE, label, img_file_name))
                cv2.imwrite(SAVE + label + '/' + img_file_name, face)
        
        # skip otherwise
        else:
            print('[INFO] Skipping... Confidence below threshold:', img)
            valid_skipped += 1
            continue

print('[STATUS] Face extraction process completed!')

  2%|█▏                                                                                | 5/330 [00:00<00:06, 46.51it/s]

Initiating face extraction process (Validation)...
[STATUS] Reading & extracting image...
[INFO] Skipping... Confidence below threshold: data/test\dahyun\RCPC_members-1205115119753023488-20191212_202009-img1.jpg


  8%|██████▍                                                                          | 26/330 [00:00<00:10, 28.88it/s]

[INFO] Skipping... Confidence below threshold: data/test\dahyun\RCPC_members-1232620519469400064-20200226_175647-img4.jpg


 22%|██████████████████▏                                                              | 74/330 [00:02<00:07, 36.12it/s]

[INFO] Skipping... Confidence below threshold: data/test\juri\RCPC_members-1213385880552886273-20200104_160512-img1.jpg
[INFO] Skipping... Confidence below threshold: data/test\juri\RCPC_members-1213385880552886273-20200104_160512-img2.jpg
[INFO] Skipping... Confidence below threshold: data/test\juri\RCPC_members-1226857875592503298-20200210_201806-img1.jpg


 24%|███████████████████▋                                                             | 80/330 [00:02<00:06, 39.53it/s]

[INFO] Skipping... Confidence below threshold: data/test\juri\RCPC_members-1227447059273469960-20200212_111918-img3.jpg
[INFO] Skipping... Confidence below threshold: data/test\juri\RCPC_members-1227447059273469960-20200212_111918-img4.jpg
[INFO] Skipping... Confidence below threshold: data/test\juri\RCPC_members-1230087961154224129-20200219_181318-img3.jpg


 32%|█████████████████████████▋                                                      | 106/330 [00:03<00:05, 43.69it/s]

[INFO] Skipping... Confidence below threshold: data/test\juri\RCPC_members-1242438394031955969-20200324_200931-img3.jpg
[INFO] Skipping... Confidence below threshold: data/test\juri\RCPC_members-1244923497077325826-20200331_164425-img1.jpg
[INFO] Skipping... No face detected: data/test\juri\RCPC_members-1248550533792591872-20200410_165658-img2.jpg


 43%|██████████████████████████████████▏                                             | 141/330 [00:03<00:04, 45.12it/s]

[INFO] Skipping... Confidence below threshold: data/test\sohee\RCPC_members-1223891941626007553-20200202_155232-img2.jpg
[INFO] Skipping... Confidence below threshold: data/test\sohee\RCPC_members-1223891941626007553-20200202_155232-img3.jpg
[INFO] Skipping... Confidence below threshold: data/test\sohee\RCPC_members-1226066028297252869-20200208_155135-img1.jpg
[INFO] Skipping... Confidence below threshold: data/test\sohee\RCPC_members-1226066028297252869-20200208_155135-img2.jpg
[INFO] Skipping... Confidence below threshold: data/test\sohee\RCPC_members-1226066028297252869-20200208_155135-img3.jpg


 50%|████████████████████████████████████████▏                                       | 166/330 [00:04<00:05, 29.79it/s]

[INFO] Skipping... Confidence below threshold: data/test\suyun\RCPC_members-1226877943437090817-20200210_213750-img4.jpg


 63%|██████████████████████████████████████████████████▍                             | 208/330 [00:06<00:04, 26.12it/s]

[INFO] Skipping... Confidence below threshold: data/test\suyun\RCPC_members-1245333141486624769-20200401_195212-img1.jpg


 66%|████████████████████████████████████████████████████▌                           | 217/330 [00:06<00:03, 32.20it/s]

[INFO] Skipping... Confidence below threshold: data/test\suyun\RCPC_members-1308614258796630016-20200923_104846-img1.jpg
[INFO] Skipping... Confidence below threshold: data/test\yeonhee\RCPC_members-1214137383932719105-20200106_175124-img1.jpg
[INFO] Skipping... Confidence below threshold: data/test\yeonhee\RCPC_members-1214137383932719105-20200106_175124-img2.jpg


 70%|████████████████████████████████████████████████████████                        | 231/330 [00:07<00:04, 24.51it/s]

[INFO] Skipping... No face detected: data/test\yeonhee\RCPC_members-1227837273321816064-20200213_130952-img4.jpg


 73%|██████████████████████████████████████████████████████████▏                     | 240/330 [00:07<00:04, 21.41it/s]

[INFO] Skipping... Confidence below threshold: data/test\yeonhee\RCPC_members-1232948837431406592-20200227_154124-img4.jpg


 76%|█████████████████████████████████████████████████████████████                   | 252/330 [00:07<00:02, 28.80it/s]

[INFO] Skipping... Confidence below threshold: data/test\yeonhee\RCPC_members-1238450293248905216-20200313_200213-img4.jpg
[INFO] Skipping... Confidence below threshold: data/test\yeonhee\RCPC_members-1242348803266506752-20200324_141331-img3.jpg
[INFO] Skipping... Confidence below threshold: data/test\yeonhee\RCPC_members-1242349932142407680-20200324_141800-img1.jpg
[INFO] Skipping... Confidence below threshold: data/test\yeonhee\RCPC_members-1242794451778433025-20200325_194421-img1.jpg


 80%|████████████████████████████████████████████████████████████████                | 264/330 [00:08<00:02, 31.82it/s]

[INFO] Skipping... No face detected: data/test\yeonhee\RCPC_members-1249661214176587776-20200413_183025-img1.jpg
[INFO] Skipping... No face detected: data/test\yeonhee\RCPC_members-1249661214176587776-20200413_183025-img2.jpg


 83%|██████████████████████████████████████████████████████████████████▏             | 273/330 [00:08<00:01, 31.61it/s]

[INFO] Skipping... No face detected: data/test\yeonhee\RCPC_members-1279269204831506432-20200704_112200-img1.jpg


 90%|████████████████████████████████████████████████████████████████████████        | 297/330 [00:09<00:01, 19.63it/s]

[INFO] Skipping... Confidence below threshold: data/test\yunkyoung\RCPC_members-1232996082944077826-20200227_184908-img1.jpg
[INFO] Skipping... Confidence below threshold: data/test\yunkyoung\RCPC_members-1232996082944077826-20200227_184908-img2.jpg


 98%|██████████████████████████████████████████████████████████████████████████████▎ | 323/330 [00:10<00:00, 24.59it/s]

[INFO] Skipping... Confidence below threshold: data/test\yunkyoung\RCPC_members-1246761904338423813-20200405_182936-img1.jpg
[INFO] Skipping... Confidence below threshold: data/test\yunkyoung\RCPC_members-1263341003185909760-20200521_122901-img1.jpg


100%|████████████████████████████████████████████████████████████████████████████████| 330/330 [00:10<00:00, 30.02it/s]

[STATUS] Face extraction process completed!





In [7]:
train_face_glob = glob('data/faces/train/*/*.jpg')
valid_face_glob = glob('data/faces/valid/*/*.jpg')
print('-----Training Images-----')
print('Skipped images:', train_skipped)
print('Extracted faces:', len(train_face_glob))
print('-----Validation Images-----')
print('Skipped images:', valid_skipped)
print('Extracted faces:', len(valid_face_glob))

-----Training Images-----
Skipped images: 112
Extracted faces: 1368
-----Validation Images-----
Skipped images: 34
Extracted faces: 296
