# Preprocess

In [1]:
""" Handle the necessary imports"""
import cv2
import math
import numpy as np
from random import randrange
import os
import glob
from matplotlib import pyplot as plt

In [2]:
patch_size=64 #60*60*3
num_training_images=10000
num_testing_images=1000



##This Notebook extracts faces from multiple datasets, resizes them according to patch size and saves them in the corresponding folders.

In [3]:

"""For FDDB Preprocessing"""

def ellipse_to_bbox(obj):
    # From proj1 reference code
    maj_rad = obj[0]
    min_rad = obj[1]
    angle = obj[2]
    xcenter = obj[3]
    ycenter = obj[4]

    cosin = math.cos(math.radians(-angle))
    sin = math.sin(math.radians(-angle))

    x1 = cosin * (-min_rad) - sin * (-maj_rad) + xcenter
    y1 = sin * (-min_rad) + cosin * (-maj_rad) + ycenter
    x2 = cosin * (min_rad) - sin * (-maj_rad) + xcenter
    y2 = sin * (min_rad) + cosin * (-maj_rad) + ycenter
    x3 = cosin * (min_rad) - sin * (maj_rad) + xcenter
    y3 = sin * (min_rad) + cosin * (maj_rad) + ycenter
    x4 = cosin * (-min_rad) - sin * (maj_rad) + xcenter
    y4 = sin * (-min_rad) + cosin * (maj_rad) + ycenter
    wid = [x1, x2, x3, x4]
    hei = [y1, y2, y3, y4]
    xmin_ = int(min(wid))
    xmax_ = int(max(wid))
    ymin_ = int(min(hei))
    ymax_ = int(max(hei))

    return xmin_, ymin_, xmax_, ymax_


num_folds=10
image_index=1


# to create directory if it doesn't exist
def check_and_create_directory(directory):
    if not os.path.exists(directory):
        os.mkdir(directory)
        print("Created Directory",directory)
    

# to generate random patch that does not overlap with face bbox
def generate_random_patch(region,total_size,patch_size):
    # region - xmin, ymin,xmax,ymax
    # total_size - xlim, ylim, 3
    xlim=total_size[0]
    ylim=total_size[1]
    x_range_face=range(region[0],region[2])
    y_range_face=range(region[1],region[3])
    x=[i for i in range(xlim) if i not in x_range_face]
    y=[i for i in range(ylim) if i not in y_range_face]
    # we have the ranges that are not part of the face
    if(len(x)>patch_size and len(y)>patch_size):
        return x[0],y[0],x[patch_size],y[patch_size] 
    

    
    
check_and_create_directory("Dataset//extracted_faces//")
check_and_create_directory("Dataset//extracted_nonfaces//")


for fold in range(1,num_folds+1):
    txt_file = ("Data//FDDB-folds//FDDB-fold-"+str(fold).zfill(2)+"-ellipseList.txt")
    with open(txt_file) as f: lines = [line.rstrip('\n') for line in f]
    i=0
    while(i<len(lines)):
            p=lines[i]
            path=p
            #print(p)
            i=i+1
            p=lines[i]
            k=int(p)
            #print(k)
            i=i+1
            #print("Ellipse")
            im1=cv2.imread("Data//originalPics//"+path+".jpg") #Read in the image
            im2=np.zeros_like(im1)
            bboxes=[]
            while(k>0):
                p=lines[i]
                #print(p)
                i=i+1
                k=k-1
                #Read the ellipse coordinates
                """<major_axis_radius minor_axis_radius angle center_x center_y 1>."""
                fields=p.split(' ')
                fields = [float(fields[i]) for i in range(5)]
                bbox=ellipse_to_bbox(fields)
                bboxes.append(bbox)
                #print(bbox)
                
            for bbox in bboxes:    
                face=im1[bbox[1]:bbox[3],bbox[0]:bbox[2]] #cropped face
                bbox_rand=generate_random_patch(bbox,im1.shape,patch_size)
                # bbox[3]  # randrange(0,im1.shape[0])
                # bbox[2]  # randrange(0,im1.shape[1])
                if(bbox_rand!=None):
                    non_face=im1[bbox_rand[1]:bbox_rand[3],bbox_rand[0]:bbox_rand[2]] #cropped nonface
                if(non_face.size>0):
                    resized_nonface=cv2.resize(non_face,(patch_size,patch_size),cv2.INTER_LANCZOS4) #resize it
                    cv2.imwrite('Dataset//extracted_nonfaces//nonface_'+str(image_index)+'.jpg',resized_nonface)  # save the image to disk 
                if(face.size>0):
                    resized_image=cv2.resize(face,(patch_size,patch_size),cv2.INTER_LANCZOS4) #resize it
                    cv2.imwrite('Dataset//extracted_faces//face_'+str(image_index)+'.jpg',resized_image)  # save the image to disk 
                    
                    image_index=image_index+1
                    #print(image_index)
                #plt.imshow(res)
                #plt.show()
                """
                #ellipse mask
    
                fields=p.split(' ')
                major_axis_radius=round(float(fields[0]))
                minor_axis_radius=round(float(fields[1]))
                angle=(float(fields[2]))*180.0/np.pi
                center_x=round(float(fields[3]))
                center_y=round(float(fields[4]))
                cv2.ellipse(im2,(center_x,center_y),(major_axis_radius,minor_axis_radius),angle,0,360,(0,0,255),-1)
    
            #res= cv2.bitwise_and(im1,im2) # has the face and black background.
            """
        
print("FDDB Done")


FDDB Done


##Now for AFLW and LFW

In [4]:
print(image_index)

4825


In [5]:
""" AFLW and LFW Preprocessing"""
#aflw_faces = []
#lfw_faces = []

t=4825
# faces AFLW
for img in glob.glob("Data//AFLW//positive_faces//*.png"):
    n= cv2.imread(img)# Read the face
    resized_image=cv2.resize(n,(patch_size,patch_size),cv2.INTER_LANCZOS4) #resize it
    if(resized_image.size==(patch_size,patch_size)):
        cv2.imwrite('Dataset//extracted_faces//face_'+str(t)+'.jpg',resized_image)  # save the image to disk 
        t=t+1          
    #aflw_faces.append(n)
    
#print(" Faces in AFLW",len(aflw_faces))

t=4825
# Non-Faces AFLW
for img in glob.glob("Data//AFLW//negative_faces//*.png"):
    n=cv2.imread(img)# Read the nonface
    resized_image=cv2.resize(n,(patch_size,patch_size),cv2.INTER_LANCZOS4) #resize it
    if(resized_image.size==(patch_size,patch_size)):
        cv2.imwrite('Dataset//extracted_nonfaces//nonface_'+str(t)+'.jpg',resized_image)  # save the image to disk 
        t=t+1        
        
# Positive faces LFW
"""
for img in glob.glob("Data//LFW//*//*.jpg"):
    n= cv2.imread(img)
    lfw_faces.append(n)
    
print("Faces in LFW",len(lfw_faces))
"""
print("Total Number of Images",t)


Total Number of Images 4825


Now that we have tested load and display of the images,we can proceed with resizing and the training and test data split.

In [13]:
index=0
while index<num_training_images:
    loaded_image=cv2.imread('Data//AFLW//positive_faces//positive_face_'+str(index)+'.png')  # load the image from disk 
    resized_image=cv2.resize(loaded_image,(patch_size,patch_size),cv2.INTER_LANCZOS4) #resize it
    cv2.imwrite('Data//AFLW//extracted_faces//train//train_face'+str(index)+'.png',resized_image)  # save the image to disk 
    #repeat for non-faces 
    #loaded_image=cv2.imread('Data//WIDER_train//images//0--Parade'+str(index)+'.png')  # load the image from disk 
    #resized_image=cv2.resize(loaded_image,(patch_size,patch_size),cv2.INTER_LANCZOS4) #resize it
    #cv2.imwrite('Data//AFLW//extracted_faces//train_face'+str(index)+'.png',resized_image)  # save the image to disk
    index+=1 #increment
    #cv2.imshow("Loaded Image",resized_image) # show image
    #cv2.waitKey(1000)

Similarly process for testing images.

In [15]:
index=1000
while index<num_testing_images+num_training_images:
    loaded_image=cv2.imread('Data//AFLW//positive_faces//positive_face_'+str(index)+'.png')  # load the image from disk 
    resized_image=cv2.resize(loaded_image,(patch_size,patch_size),cv2.INTER_LANCZOS4) #resize it
    cv2.imwrite('Data//AFLW//extracted_faces//test//test_face'+str(index-num_training_images)+'.png',resized_image)  # save the image to disk 
    #repeat for non-faces 
    #loaded_image=cv2.imread('Data//WIDER_train//images//0--Parade'+str(index)+'.png')  # load the image from disk 
    #resized_image=cv2.resize(loaded_image,(patch_size,patch_size),cv2.INTER_LANCZOS4) #resize it
    #cv2.imwrite('Data//AFLW//extracted_faces//train_face'+str(index)+'.png',resized_image)  # save the image to disk
    index+=1 #increment
    #cv2.imshow("Loaded Image",resized_image) # show image
    #cv2.waitKey(1000)

Now we can proceed with training.


## PREPARE TRAINING DATA

(1000, 10800)
(1000, 10800)
