In [1]:
from imutils import paths
import numpy as np
import imutils
import pickle
import cv2
import os, sys

sys.path.append('../utilities')
from alignFaces import *

In [2]:
data_params = {
    "dataset":"../../facial_recognition_data/faces_dataset/", # path to input directory of faces and images
    "embeddings":"../../output/embeddings.pickle", # path to output of database of facial embeddings
    "detector":"../../facial_detection/models/", # path to OpenCV's deep learning face detector
    "embedding_model":"../models/openface.nn4.small2.v1.t7", # path to OpenCV's deep learning face embedding model
    "confidence":0.5 # min probability to filter weak detections
}

In [3]:
# load our serialized face detector from disk
print("[INFO] loading face detector...")
protoPath = os.path.sep.join([data_params["detector"], "deploy.prototxt"])
modelPath = os.path.sep.join([data_params["detector"], "res10_300x300_ssd_iter_140000.caffemodel"])
detector = cv2.dnn.readNetFromCaffe(protoPath, modelPath)

# load our serialized face embedding model from disk
print("[INFO] loading face recognizer...")
embedder = cv2.dnn.readNetFromTorch(data_params["embedding_model"])

[INFO] loading face detector...
[INFO] loading face recognizer...


In [4]:
# grab the paths to the input images in our dataset
print("[INFO] quantifying faces...")
imagePaths = list(paths.list_images(data_params["dataset"]))
print(*imagePaths, sep="\n")
# initialize our lists of extracted facial embeddings and
# corresponding people names
knownEmbeddings = []
knownNames = []

# initialize the total number of faces processed
total = 0

[INFO] quantifying faces...
../../facial_recognition_data/faces_dataset/andrew\andrew_frame0.jpg
../../facial_recognition_data/faces_dataset/andrew\andrew_frame1001.jpg
../../facial_recognition_data/faces_dataset/andrew\andrew_frame1008.jpg
../../facial_recognition_data/faces_dataset/andrew\andrew_frame1015.jpg
../../facial_recognition_data/faces_dataset/andrew\andrew_frame1022.jpg
../../facial_recognition_data/faces_dataset/andrew\andrew_frame1029.jpg
../../facial_recognition_data/faces_dataset/andrew\andrew_frame1036.jpg
../../facial_recognition_data/faces_dataset/andrew\andrew_frame1043.jpg
../../facial_recognition_data/faces_dataset/andrew\andrew_frame105.jpg
../../facial_recognition_data/faces_dataset/andrew\andrew_frame1050.jpg
../../facial_recognition_data/faces_dataset/andrew\andrew_frame1057.jpg
../../facial_recognition_data/faces_dataset/andrew\andrew_frame1064.jpg
../../facial_recognition_data/faces_dataset/andrew\andrew_frame1071.jpg
../../facial_recognition_data/faces_data

In [5]:
# loop over the image paths
af = AlignFaces()
for (i, imagePath) in enumerate(imagePaths):
    # extract the person name from the image path
    print("[INFO] processing image {}/{}".format(i + 1, len(imagePaths)))
    # The right side of the last occurance of a '/' character is gotten from the path.
    # Then the string is split based on the '\' character which prepends the image name.
    # The left side of the split, representing the directory/person name is saved as name.
    name = imagePath[imagePath.rindex('/')+1:].split('\\')[0]
    
    image = cv2.imread(imagePath) # Load the image
    image = af.align_faces(image) # Align the image
    image = imutils.resize(image, width=600) # Resize the image (while maintaining aspect ratio)
    (h, w) = image.shape[:2] # Grab the image dimensions.
    
    # construct a blob from the image
    imageBlob = cv2.dnn.blobFromImage(
        cv2.resize(image, (300, 300)), 1.0, (300, 300),
        (104.0, 177.0, 123.0), swapRB=False, crop=False)

    # apply OpenCV's deep learning-based face detector to localize
    # faces in the input image
    detector.setInput(imageBlob)
    detections = detector.forward()
    
    # ensure at least one face was found
    if len(detections) > 0:
        # we're making the assumption that each image has only ONE
        # face, so find the bounding box with the largest probability
        i = np.argmax(detections[0, 0, :, 2])
        confidence = detections[0, 0, i, 2]

        # ensure that the detection with the largest probability also
        # means our minimum probability test (thus helping filter out
        # weak detections)
        if confidence > data_params["confidence"]:
            # compute the (x, y)-coordinates of the bounding box for
            # the face
            box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
            (startX, startY, endX, endY) = box.astype("int")

            # extract the face ROI and grab the ROI dimensions
            face = image[startY:endY, startX:endX]
            (fH, fW) = face.shape[:2]

            # ensure the face width and height are sufficiently large
            if fW < 20 or fH < 20:
                continue
            
            # construct a blob for the face ROI, then pass the blob
            # through our face embedding model to obtain the 128-d
            # quantification of the face
            faceBlob = cv2.dnn.blobFromImage(face, 1.0 / 255,
                (96, 96), (0, 0, 0), swapRB=True, crop=False)
            embedder.setInput(faceBlob)
            vec = embedder.forward()

            # add the name of the person + corresponding face
            # embedding to their respective lists
            knownNames.append(name)
            knownEmbeddings.append(vec.flatten())
            total += 1

[INFO] processing image 1/1000
[INFO] processing image 2/1000
[INFO] processing image 3/1000
[INFO] processing image 4/1000
[INFO] processing image 5/1000
[INFO] processing image 6/1000
[INFO] processing image 7/1000
[INFO] processing image 8/1000
[INFO] processing image 9/1000
[INFO] processing image 10/1000
[INFO] processing image 11/1000
[INFO] processing image 12/1000
[INFO] processing image 13/1000
[INFO] processing image 14/1000
[INFO] processing image 15/1000
[INFO] processing image 16/1000
[INFO] processing image 17/1000
[INFO] processing image 18/1000
[INFO] processing image 19/1000
[INFO] processing image 20/1000
[INFO] processing image 21/1000
[INFO] processing image 22/1000
[INFO] processing image 23/1000
[INFO] processing image 24/1000
[INFO] processing image 25/1000
[INFO] processing image 26/1000
[INFO] processing image 27/1000
[INFO] processing image 28/1000
[INFO] processing image 29/1000
[INFO] processing image 30/1000
[INFO] processing image 31/1000
[INFO] processing

[INFO] processing image 238/1000
[INFO] processing image 239/1000
[INFO] processing image 240/1000
[INFO] processing image 241/1000
[INFO] processing image 242/1000
[INFO] processing image 243/1000
[INFO] processing image 244/1000
[INFO] processing image 245/1000
[INFO] processing image 246/1000
[INFO] processing image 247/1000
[INFO] processing image 248/1000
[INFO] processing image 249/1000
[INFO] processing image 250/1000
[INFO] processing image 251/1000
[INFO] processing image 252/1000
[INFO] processing image 253/1000
[INFO] processing image 254/1000
[INFO] processing image 255/1000
[INFO] processing image 256/1000
[INFO] processing image 257/1000
[INFO] processing image 258/1000
[INFO] processing image 259/1000
[INFO] processing image 260/1000
[INFO] processing image 261/1000
[INFO] processing image 262/1000
[INFO] processing image 263/1000
[INFO] processing image 264/1000
[INFO] processing image 265/1000
[INFO] processing image 266/1000
[INFO] processing image 267/1000
[INFO] pro

[INFO] processing image 419/1000
[INFO] processing image 420/1000
[INFO] processing image 421/1000
[INFO] processing image 422/1000
[INFO] processing image 423/1000
[INFO] processing image 424/1000
[INFO] processing image 425/1000
[INFO] processing image 426/1000
[INFO] processing image 427/1000
[INFO] processing image 428/1000
[INFO] processing image 429/1000
[INFO] processing image 430/1000
[INFO] processing image 431/1000
[INFO] processing image 432/1000
[INFO] processing image 433/1000
[INFO] processing image 434/1000
[INFO] processing image 435/1000
[INFO] processing image 436/1000
[INFO] processing image 437/1000
[INFO] processing image 438/1000
[INFO] processing image 439/1000
[INFO] processing image 440/1000
[INFO] processing image 441/1000
[INFO] processing image 442/1000
[INFO] processing image 443/1000
[INFO] processing image 444/1000
[INFO] processing image 445/1000
[INFO] processing image 446/1000
[INFO] processing image 447/1000
[INFO] processing image 448/1000
[INFO] pro

[INFO] processing image 656/1000
[INFO] no detections found in image. Returning original.
[INFO] processing image 657/1000
[INFO] processing image 658/1000
[INFO] processing image 659/1000
[INFO] processing image 660/1000
[INFO] processing image 661/1000
[INFO] processing image 662/1000
[INFO] processing image 663/1000
[INFO] no detections found in image. Returning original.
[INFO] processing image 664/1000
[INFO] processing image 665/1000
[INFO] processing image 666/1000
[INFO] processing image 667/1000
[INFO] processing image 668/1000
[INFO] no detections found in image. Returning original.
[INFO] processing image 669/1000
[INFO] processing image 670/1000
[INFO] processing image 671/1000
[INFO] processing image 672/1000
[INFO] processing image 673/1000
[INFO] processing image 674/1000
[INFO] processing image 675/1000
[INFO] processing image 676/1000
[INFO] processing image 677/1000
[INFO] processing image 678/1000
[INFO] processing image 679/1000
[INFO] processing image 680/1000
[INF

[INFO] processing image 875/1000
[INFO] processing image 876/1000
[INFO] processing image 877/1000
[INFO] processing image 878/1000
[INFO] processing image 879/1000
[INFO] processing image 880/1000
[INFO] processing image 881/1000
[INFO] processing image 882/1000
[INFO] processing image 883/1000
[INFO] processing image 884/1000
[INFO] processing image 885/1000
[INFO] processing image 886/1000
[INFO] processing image 887/1000
[INFO] processing image 888/1000
[INFO] processing image 889/1000
[INFO] processing image 890/1000
[INFO] processing image 891/1000
[INFO] processing image 892/1000
[INFO] processing image 893/1000
[INFO] no detections found in image. Returning original.
[INFO] processing image 894/1000
[INFO] processing image 895/1000
[INFO] processing image 896/1000
[INFO] processing image 897/1000
[INFO] processing image 898/1000
[INFO] processing image 899/1000
[INFO] processing image 900/1000
[INFO] processing image 901/1000
[INFO] processing image 902/1000
[INFO] processing i

In [6]:
# dump the facial embeddings + names to disk
print("[INFO] serializing {} encodings...".format(total))
data = {"embeddings": knownEmbeddings, "names": knownNames}
f = open(data_params["embeddings"], "wb")
f.write(pickle.dumps(data))
f.close()

[INFO] serializing 755 encodings...
