First we import essential libraries that we'll use throughout the notebook. These include tools for numerical computation (NumPy), image processing (OpenCV), plotting (Matplotlib), and managing files and directories (os, shutil, and Path).

In [20]:
import numpy as np
import cv2
import matplotlib
from matplotlib import pyplot as plt
import os
from pathlib import Path
import shutil

Next we load a sample image from the test_data folder using OpenCV's imread function, and then convert the image to grayscale. Converting to grayscale is a common preprocessing step for facial detection algorithms because it simplifies the image data.

In [21]:
img = cv2.imread('./test_data/cary_elwes.jpg')
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
plt.imshow(img)

This block sets up the face and eye detection process. We first copy the original image to preserve it. Then we load the Haar cascade classifiers for face and eye detection. We detect faces in the grayscale image, and for each detected face, we look for eyes inside the face region. Rectangles are drawn around each face and eye, and the image is displayed in a separate window using OpenCV. 

In [22]:
img_orig = img.copy()
face_cascade = cv2.CascadeClassifier('./haar_cascades_classifier/haarcascade_frontalface_default.xml')
eye_cascade = cv2.CascadeClassifier('./haar_cascades_classifier/haarcascade_eye.xml')

faces = face_cascade.detectMultiScale(gray, 1.3,5) 

for (x,y,w,h) in faces:
    
    face_region_gray = gray[y:y+h, x:x+w] 
    face_region_color = img[y:y+h, x:x+w]
    
    cv2.rectangle(img,(x,y),(x+w,y+h),(255,0,0),2)
    eyes =  eye_cascade.detectMultiScale(face_region_gray)
    
    for (x_eye, y_eye, w_eye, h_eye) in eyes:
        eyes_img = cv2.rectangle(face_region_color,(x_eye,y_eye),(x_eye+w_eye,y_eye+h_eye),(0,255,0),2)
    
cv2.imshow('img',img)
cv2.waitKey(3000)
cv2.destroyAllWindows()
img = img_orig

Here we define a helper function that takes an image path and attempts to detect face regions containing at least two eyes. The function returns a list of cropped face images, which will later be used for training or visualization.

In [23]:
def extract_face_regions(img_path, silent = True):
    
    face_regions = []  #here we will store clearly visible faces (faces with 2 eyes)
    
    img = cv2.imread(img_path)
    
    gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
    faces = face_cascade.detectMultiScale(gray, 1.3,5)

    for (x,y,w,h) in faces:

        face_region_gray = gray[y:y+h, x:x+w] #face area gray
        face_region_color = img[y:y+h, x:x+w] #face area color

        eyes =  eye_cascade.detectMultiScale(face_region_gray)
        
        if len(eyes) >= 2:
            face_regions.append(face_region_color)
        
    if len(face_regions) == 0 and silent==False:
        print('No clear faces found.')
        
    return face_regions

Let's see the function in action! Here is what it does to the image of Cary Elwes we saw before.

In [26]:
a = extract_face_regions('./test_data/cary_elwes.jpg', silent = False)
plt.imshow(a[0])

In [8]:
img =  cv2.imread('./test_data/multiple_people.jpg')
plt.imshow(img)

In [9]:
cr_faces = extract_face_regions('./test_data/multiple_people.jpg')
for cr_face in cr_faces:
    plt.figure()
    plt.imshow(cr_face)

In [10]:
path_raw_data = "./raw_data/"
path_cr_data = "./cr_data/"

img_folders = []
actor_names = []

for folder in os.scandir(path_raw_data):
    if folder.is_dir():
        img_folders.append(folder.path)
        actor_names.append(folder.name)
        
# If the cropped data folder was made in a previous run, we want to remove it and remake it
cr_data_folder = Path(path_cr_data)
if cr_data_folder.exists() and cr_data_folder.is_dir():
    shutil.rmtree(cr_data_folder)
    
os.mkdir(path_cr_data)

for name in actor_names:
    path_to_cr_actor = path_cr_data +'/'+ name
    if not os.path.exists(path_to_cr_actor):
        os.mkdir(path_to_cr_actor)
    

In [11]:
cropped_img_dictionary = {}

for actor_ind in range(5):
    actor_name = actor_names[actor_ind]
    cr_face_paths = []
    
    cropped_photo_ind = 0
    print(actor_ind)
    for image in os.scandir(img_folders[actor_ind]):

        path_to_image = img_folders[actor_ind] +'/'+image.name
        cr_faces = extract_face_regions(path_to_image, silent = True)

        path_to_cr_actor = path_cr_data + actor_name

        for cr_face in cr_faces:
            cropped_photo_ind += 1

            cr_face_path = path_to_cr_actor + '/' +str(actor_name)  +str(cropped_photo_ind) +'.jpg'

            cv2.imwrite(cr_face_path, cr_face)
            
            cr_face_paths.append(cr_face_path)
            
    cropped_img_dictionary[actor_name] = cr_face_paths
        