In [1]:
# import mtcnn
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import cv2 # opencv
from mtcnn.mtcnn import MTCNN
from matplotlib import pyplot as plt
import os
from PIL import Image

In [2]:
# Function to align the eyes horizontally
def align_eyes_horizontally(image, keypoints):
    left_eye = keypoints['left_eye']
    right_eye = keypoints['right_eye']
    # Calculate the angle to align the eyes
    dY = right_eye[1] - left_eye[1]
    dX = right_eye[0] - left_eye[0]
    angle = np.degrees(np.arctan2(dY, dX))

    # Get the center of the eyes
    eyes_center = ((left_eye[0] + right_eye[0]) // 2, (left_eye[1] + right_eye[1]) // 2)

    # Rotate the image around the center of the eyes
    M = cv2.getRotationMatrix2D(eyes_center, angle, 1)
    aligned_image = cv2.warpAffine(image, M, (image.shape[1], image.shape[0]))

    return aligned_image

In [3]:
# extract a single face from a given photograph
def extract_face(filename, required_size=(160, 160)):
    # load image from file
    image = Image.open(filename)
    # convert to RGB, if needed
    image = image.convert('RGB')
    # convert to array
    pixels = np.asarray(image)
    # create the detector, using default weights
    detector = MTCNN()
    # detect faces in the image
    results = detector.detect_faces(pixels)
    #### print('results - ', results)
    
    if len(results) == 0:
        print('No face DETECTED. Returning None')
        return None  # No face detected, return None
    
    # extract the bounding box from the first face
    x1, y1, width, height = results[0]['box']
    # deal with negative pixel index
    x1, y1 = abs(x1), abs(y1)
    x2, y2 = x1 + width, y1 + height
    # extract the face
    face = pixels[y1:y2, x1:x2]
    #### print('face - ', face)
    
    # Align the face
    keypoints = results[0]['keypoints']
    aligned_face = align_eyes_horizontally(face, keypoints)
    
    # resize pixels to the model size
    image = Image.fromarray(aligned_face)
    image = image.resize(required_size)
    face_array = np.asarray(image)
    return face_array

In [4]:
def load_face(dir):
    faces = list()
    # enumerate files
    for filename in os.listdir(dir):
        path = dir + filename
        face = extract_face(path)
        faces.append(face)
    return faces

def load_dataset(dir):
    # list for faces and labels
    X, y = list(), list()
    for subdir in os.listdir(dir):
        path = dir + subdir + '/'
        faces = load_face(path)
        labels = [subdir for i in range(len(faces))]
        print("loaded %d sample for class: %s" % (len(faces),subdir) ) # print progress
        X.extend(faces)
        y.extend(labels)
    return np.asarray(X), np.asarray(y)

# load train dataset
trainX, trainy = load_dataset('lfw-deepfunneled/data/train/')
print(trainX.shape, trainy.shape)
# load test dataset
testX, testy = load_dataset('lfw-deepfunneled/data/test/')
print(testX.shape, testy.shape)

# save and compress the dataset for further use
np.savez_compressed('lfw_train_test_compressed_aligned.npz', trainX, trainy, testX, testy)

loaded 213 sample for class: Colin_Powell
loaded 109 sample for class: Donald_Rumsfeld
loaded 477 sample for class: George_W_Bush
loaded 99 sample for class: Gerhard_Schroeder
loaded 130 sample for class: Tony_Blair
(1028, 160, 160, 3) (1028,)
loaded 23 sample for class: Colin_Powell
loaded 12 sample for class: Donald_Rumsfeld
loaded 53 sample for class: George_W_Bush
loaded 10 sample for class: Gerhard_Schroeder
loaded 14 sample for class: Tony_Blair
(112, 160, 160, 3) (112,)
