In [30]:
# USAGE
# python cluster_faces.py --encodings encodings.pickle

# import the necessary packages
from sklearn.cluster import DBSCAN
from imutils import build_montages
import numpy as np
import argparse
import pickle
import cv2
import face_recognition.

In [37]:
class ClusterEncoding:
    
    def __init__(self, encodingFile):
        # load the serialized face encodings + bounding box locations from
        # disk, then extract the set of encodings to so we can cluster on
        # them
        print("[INFO] loading encodings...")
        self.data = pickle.loads(open(encodingFile, "rb").read())
        self.data = np.array(self.data)
        self.encodings = [d["encoding"]  for d in self.data]

    def clusterEncodingDBSCAN(self, jobs):
        # cluster the embeddings
        # No. of parallel jobs to run (-1 will use all CPUs)
        print("[INFO] clustering...")
        dbscan_clt = DBSCAN(metric="euclidean", n_jobs=jobs)
        dbscan_clt.fit(self.encodings)
        return dbscan_clt
    
    def show(self, clt):
        # determine the total number of unique faces found in the dataset
        labelIDs = np.unique(clt.labels_)
        numUniqueFaces = len(np.where(labelIDs > -1)[0])
        print("[INFO] # unique faces: {}".format(numUniqueFaces))

        # loop over the unique face integers
        for labelID in labelIDs:
            # find all indexes into the `data` array that belong to the
            # current label ID, then randomly sample a maximum of 25 indexes
            # from the set
            print("[INFO] faces for face ID: {}".format(labelID))
            idxs = np.where(clt.labels_ == labelID)[0]
            idxs = np.random.choice(idxs, size=min(25, len(idxs)),
                replace=False)

            # initialize the list of faces to include in the montage
            faces = []

            # loop over the sampled indexes
            for i in idxs:
                # load the input image and extract the face ROI
                image = cv2.imread(self.data[i]["imagePath"])
                (top, right, bottom, left) = self.data[i]["loc"]
                face = image[top:bottom, left:right]

                # force resize the face ROI to 96x96 and then add it to the
                # faces montage list
                face = cv2.resize(face, (96, 96))
                faces.append(face)

            # create a montage using 96x96 "tiles" with 5 rows and 5 columns
            montage = build_montages(faces, (96, 96), (5, 5))[0]

            # show the output montage
            title = "Face ID #{}".format(labelID)
            title = "Unknown Faces" if labelID == -1 else title
            cv2.imshow(title, montage)
            cv2.waitKey(0)
        cv2.destroyAllWindows()

In [38]:
obj = ClusterEncoding('/home/dai/Documents/pgdai/project/FFR/embedding/clstr_encoding.pickle')

[INFO] loading encodings...


In [39]:
cluster = obj.clusterEncodingDBSCAN(-1)

[INFO] clustering...


In [16]:
obj.show(cluster)

[INFO] # unique faces: 4
[INFO] faces for face ID: -1
[INFO] faces for face ID: 0
[INFO] faces for face ID: 1
[INFO] faces for face ID: 2
[INFO] faces for face ID: 3


In [None]:
# construct the argument parser and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-e", "--encodings", required=True,
	help="path to serialized db of facial encodings")
ap.add_argument("-j", "--jobs", type=int, default=-1,
	help="# of parallel jobs to run (-1 will use all CPUs)")
args = vars(ap.parse_args())

In [36]:
dlib.chinese_whispers_clustering?

In [28]:
cluster.components_?