1. Encoding faces via deep learning

In [1]:
from imutils import paths
import face_recognition
import pickle
import cv2

dataset = "./dataset"
encodings_file = "./encodings.pickle"
detection_method = "cnn"

# grab the paths to the input images in our dataset, then initialize
# out data list (which we'll soon populate)
print("[INFO] quantifying faces...")
imagePaths = list(paths.list_images(dataset))
data = []

for (i, imagePath) in enumerate(imagePaths):
	# load the input image and convert it from RGB (OpenCV ordering)
	# to dlib ordering (RGB)
	print("[INFO] processing image {}/{}".format(i + 1,
		len(imagePaths)))
	print(imagePath)
	image = cv2.imread(imagePath)
	rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
	
    # detect the (x, y)-coordinates of the bounding boxes
	# corresponding to each face in the input image
	boxes = face_recognition.face_locations(rgb,
		model=detection_method)
	
    # compute the facial embedding for the face
	encodings = face_recognition.face_encodings(rgb, boxes)

	# build a dictionary of the image path, bounding box location,
	# and facial encodings for the current image
	d = [{"imagePath": imagePath, "loc": box, "encoding": enc}
		for (box, enc) in zip(boxes, encodings)]
	data.extend(d)

# dump the facial encodings data to disk
print("[INFO] serializing encodings...")
with open(encodings_file, "wb") as f:
    f.write(pickle.dumps(data))


[INFO] quantifying faces...
[INFO] processing image 1/21
./dataset/10.jpeg
[INFO] processing image 2/21
./dataset/0.jpeg
[INFO] processing image 3/21
./dataset/1.jpeg
[INFO] processing image 4/21
./dataset/IMG_9875.jpeg
[INFO] processing image 5/21
./dataset/IMG_9859.jpeg
[INFO] processing image 6/21
./dataset/IMG_8946.jpeg
[INFO] processing image 7/21
./dataset/6.jpeg
[INFO] processing image 8/21
./dataset/7.jpeg
[INFO] processing image 9/21
./dataset/IMG_9858.jpeg
[INFO] processing image 10/21
./dataset/IMG_8875.jpeg
[INFO] processing image 11/21
./dataset/8.jpeg
[INFO] processing image 12/21
./dataset/4.jpeg
[INFO] processing image 13/21
./dataset/IMG_9657.jpeg
[INFO] processing image 14/21
./dataset/IMG_9856.jpeg
[INFO] processing image 15/21
./dataset/5.jpeg
[INFO] processing image 16/21
./dataset/9.jpeg
[INFO] processing image 17/21
./dataset/IMG_3542.jpeg
[INFO] processing image 18/21
./dataset/2.jpeg
[INFO] processing image 19/21
./dataset/IMG_3509.jpeg
[INFO] processing image 

2. Clustering faces

In [None]:
from sklearn.cluster import DBSCAN
from imutils import build_montages
from helper import draw
import numpy as np
import pickle
import cv2

encodings_file = "./encodings.pickle"
# num of parallel jobs to run (-1 will use all CPUs)
num_of_jobs = -1

# load the serialized face encodings + bounding box locations from
# disk, then extract the set of encodings to so we can cluster on
# them
print("[INFO] loading encodings...")
data = pickle.loads(open(encodings_file, "rb").read())
data = np.array(data)
encodings = [d["encoding"] for d in data]

# cluster the embeddings
print("[INFO] clustering...")
clt = DBSCAN(metric="euclidean", n_jobs=num_of_jobs)
clt.fit(encodings)

# determine the total number of unique faces found in the dataset
labelIDs = np.unique(clt.labels_)
numUniqueFaces = len(np.where(labelIDs > -1)[0])
print("[INFO] # unique faces: {}".format(numUniqueFaces))

for labelID in labelIDs:
	# find all indexes into the `data` array that belong to the
	# current label ID, then randomly sample a maximum of 25 indexes
	# from the set
    print("[INFO] faces for face ID: {}".format(labelID))
    idxs = np.where(clt.labels_ == labelID)[0]
    idxs = np.random.choice(idxs, size=min(25, len(idxs)), replace=False)
	
    # initialize the list of faces to include in the montage
    faces = []
	
    for i in idxs:
		# load the input image and extract the face ROI
        image = cv2.imread(data[i]["imagePath"])
        (top, right, bottom, left) = data[i]["loc"]
        face = image[top:bottom, left:right]

		# force resize the face ROI to 96x96 and then add it to the
		# faces montage list
        face = cv2.resize(face, (96, 96))
        faces.append(face)

    # create a montage using 96x96 "tiles" with 5 rows and 5 columns
    montage = build_montages(faces, (96, 96), (5, 5))[0]

    # show the output montage
    title = "Face ID #{}".format(labelID)
    title = "Unknown Faces" if labelID == -1 else title
    draw(title, montage)


