In [1]:
import pandas as pd
import os
import shutil
import cv2
import sys
import dlib
import math
from IPython.display import clear_output, display
import time
from tqdm import tqdm

In [2]:
images_dimensions = 128

In [3]:
attribute = 'initial-set'

In [4]:
IMAGES_FOLDER = './images/'
folderDimensionsSufix = str( images_dimensions ) + 'x' + str( images_dimensions )

ORIGINALS_FOLDER = IMAGES_FOLDER + f'originals--{attribute}/'
padronizedFolder = IMAGES_FOLDER + f'padronized--{attribute}--{folderDimensionsSufix}/'
croppedsFolder = IMAGES_FOLDER + f'croppeds--{attribute}--{folderDimensionsSufix}/'

In [5]:
lastTime = time.time()

In [6]:
try:
    shutil.rmtree(padronizedFolder)
    os.mkdir(padronizedFolder)
except OSError as e:
    os.mkdir(padronizedFolder)

In [7]:
try:
    shutil.rmtree(croppedsFolder)
    os.mkdir(croppedsFolder)
except OSError as e:
    os.mkdir(croppedsFolder)

In [8]:
_, _, files = next( os.walk( ORIGINALS_FOLDER ) )
files.sort()
print( f'Total of images: {len( files )}' )

Total of images: 97609


### Convert to gray scale

In [9]:
counter = 1
for file in tqdm(files):
    
    # Load image in memory
    loadedImage = cv2.imread(ORIGINALS_FOLDER + file)
    
    # Convert to gray scale
    grayImage = cv2.cvtColor(loadedImage, cv2.COLOR_BGR2GRAY)
    
    # Save gray scale image
    cv2.imwrite( padronizedFolder + file, grayImage )
    
    counter += 1
    
print( '\n----------------------------------------------------' )
print( 'Finalized' )

100%|██████████| 97609/97609 [08:45<00:00, 185.84it/s]


----------------------------------------------------
Finalized





### Crop function: crop and save the face

In [10]:
from PIL import Image

def crop( image_path, coords, saved_location ):

    image_obj = Image.open( image_path )
    cropped_image = image_obj.crop( coords )
    cropped_image.save( saved_location )

### Find face with HOG: find and call crop function

In [11]:
_, _, files = next( os.walk( padronizedFolder ) )
files.sort()
print( f'Total of images: {len( files )}' )

Total of images: 97609


In [12]:
# https://www.learnopencv.com/face-detection-opencv-dlib-and-deep-learning-c-python/
hogFaceDetector = dlib.get_frontal_face_detector()

counter = 1
for file in tqdm(files):
    
    # Load image in memory
    loadedImage = cv2.imread( padronizedFolder + file )
    
    facesRects = hogFaceDetector( loadedImage, 0 )
    for k, d in enumerate( facesRects ):

        # Bounding box
        x = d.left()
        y = d.top()
        l = d.right() - x
        a = d.bottom() - y
        if x < 0: x = 0

        crop( padronizedFolder + file,
             (x, y, x + l, y + a ),
             croppedsFolder + file)        

        counter += 1
    
print( '\n----------------------------------------------------' )
print( 'Finalized' )

100%|██████████| 97609/97609 [19:23<00:00, 83.87it/s]


----------------------------------------------------
Finalized





### Resize cropped faces

In [13]:
_, _, files = next( os.walk( croppedsFolder ) )
files.sort()
print( f'Total of images: {len( files )}' )

Total of images: 96241


In [14]:
try:
    shutil.rmtree(padronizedFolder)
    os.mkdir(padronizedFolder)
except OSError as e:
    os.mkdir(padronizedFolder)

In [15]:
counter = 1
for file in tqdm(files):

    # Load image in memory
    loadedImage = cv2.imread( croppedsFolder + file )    
    
    # Resize image
    width = images_dimensions
    height = int( loadedImage.shape[0] * width / loadedImage.shape[1] )
    dsize = (width, height)
    resizedImage = cv2.resize(loadedImage, dsize)

    # Avoid height less than images dimensions to avoid square image.
    if height < images_dimensions:
        height = images_dimensions
        width = int( loadedImage.shape[1] * height / loadedImage.shape[0] )
        dsize = (width, height)
        resizedImage = cv2.resize(loadedImage, dsize)

    # Crop all images to the same height
    croppedImage = resizedImage[0:images_dimensions, 0:images_dimensions]
    
    # Save the padronized image
    cv2.imwrite(padronizedFolder + file, croppedImage)
    
    counter += 1
    
print( '\n----------------------------------------------------' )
print( 'Finalized' )

100%|██████████| 96241/96241 [08:00<00:00, 200.22it/s]


----------------------------------------------------
Finalized





In [16]:
# Time
now = time.time()
interval = now - lastTime
print('Proccess Time: %.2f min' % (interval/60))

Proccess Time: 36.38 min
