# Build Face Database

Prepare the database for faces to recognize.

Download a data set under a ./data directory.  

This example uses Labeled Faces in the Wild (http://vis-www.cs.umass.edu/lfw/)

In [1]:
import os
import sys
import cv2
import time
import numpy as np
import pandas as pd
from scipy import misc

import tensorflow as tf
print(tf.__version__)

# Checkout facenet to same root directory as this repository.
sys.path.append("../facenet/src")
import facenet
import align.detect_face

  from ._conv import register_converters as _register_converters


1.6.0-rc0


In [2]:
MODEL_FILE = "./20170512-110547/20170512-110547.pb"

facenet_graph = tf.Graph()
with facenet_graph.as_default():
    facenet_graph_def = tf.GraphDef()
    with tf.gfile.GFile(MODEL_FILE, 'rb') as fid:
        serialized_graph = fid.read()
        facenet_graph_def.ParseFromString(serialized_graph)            
        tf.import_graph_def(facenet_graph_def, name='enet')
        
        sess = tf.Session()
        with sess.as_default():
            enet = lambda img : sess.run(('enet/embeddings:0'), feed_dict={'enet/input:0':img, 'enet/phase_train:0':False})
            pnet, rnet, onet = align.detect_face.create_mtcnn(sess, None)       

Instructions for updating:
keep_dims is deprecated, use keepdims instead
Instructions for updating:
keep_dims is deprecated, use keepdims instead


In [3]:
# Face Detection constants.
MIN_FACE_SIZE = 20                     # minimum size of the face for the MTCNN
DETECT_THRESHOLDS = [ 0.6, 0.7, 0.7 ]  # threshold values for the three stages of the MTCNN
SCALE_FACTOR = 0.709                   # MTCNN scale factor

# Face Embedding constants.
INPUT_IMAGE_SIZE = 160

# This function normalizes the image before generating the embedding.
def run_facenet(image):
    image_data = np.around(image/255.0, decimals=12)
    image_data = np.expand_dims(image_data, axis=0)
    return enet(image_data)

def import_dataset(input_dir, output_dir):
    df = None

    # Pick up where we left off if we had to kill the process as it was loading.
    if os.path.exists("faces.csv"):
        df = pd.read_csv("faces.csv")

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
        
    dataset = facenet.get_dataset(input_dir)
    
    for cls in dataset:
        output_class_dir = os.path.join(output_dir, cls.name)
        
        if not os.path.exists(output_class_dir):
            os.makedirs(output_class_dir)

        for image_path in cls.image_paths:
            filename = os.path.splitext(os.path.split(image_path)[1])[0]
            output_filename = os.path.join(output_class_dir, filename+'.png')
            
            # Print what name we are on to give some idea of progress.
            print(cls.name)
            
            if os.path.exists(output_filename):
                continue
                
            try:
                image = misc.imread(image_path)
            except (IOError, ValueError, IndexError) as e:
                errorMessage = '{}: {}'.format(image_path, e)
                print(errorMessage)
            else:
                if image.ndim<3:
                    print('Unable to align "%s"' % image_path)
                    continue
                    
                # remove alpha
                image = image[:,:,0:3]

                height, width = image.shape[0:2]
                
                bb, _ = align.detect_face.detect_face(image, MIN_FACE_SIZE, pnet, rnet, onet, DETECT_THRESHOLDS, SCALE_FACTOR)

                faces = bb.shape[0]
                
                if (faces == 1):  
                    box = np.zeros(4, dtype=np.int32)
                    box[0] = np.maximum(bb[0, 0], 0)
                    box[1] = np.maximum(bb[0, 1], 0)
                    box[2] = np.minimum(bb[0, 2], width)
                    box[3] = np.minimum(bb[0, 3], height)
                    
                    cropped = image[box[1]:box[3],box[0]:box[2],:]

                    scaled = misc.imresize(cropped, (INPUT_IMAGE_SIZE, INPUT_IMAGE_SIZE), interp='bilinear')
                    embedding = run_facenet(scaled)
                                    
                    df1 = pd.DataFrame([cls.name], columns=["id"])
                    df2 = pd.DataFrame(embedding)
                    row = pd.concat([df1, df2], axis=1)

                    if (df is None):
                        df = row
                    else:
                        df = df.append(row)

                    df.to_csv("faces.csv", index=False)

                    filename_base, file_extension = os.path.splitext(output_filename)
                    output_filename_n = "{}{}".format(filename_base, file_extension)
                    misc.imsave(output_filename_n, scaled)

In [6]:
sys.path.append("../faiss")
import faiss 

df_test = pd.read_csv("faces_test.csv")

face_identities = []
face_index = faiss.IndexFlatL2(128)

for _, row in df_test.iterrows():
    identity = row['id']
    embedding = row.iloc[1:129].as_matrix().astype('float32')
    embedding = np.ascontiguousarray(embedding.reshape(1, 128))
    face_index.add(embedding)
    face_identities.append(identity)

Failed to load GPU Faiss: No module named swigfaiss_gpu
Faiss falling back to CPU-only.


In [117]:
def load_image(image_path):
    errorMessage = ''
    
    img = cv2.imread(image_path)
    
    if (img is None):
        errorMessage = '{}: failed to load'.format(image_path)
        return None, errorMessage
    
    height, width, channels = img.shape 
    
    if (channels < 3):
        errorMessage = '{}: less than three dimensions'.format(image_path)
        return None, errorMessage
    
    # Remove Alpha
    img = img[:,:,0:3]
    
    # convert to RGB.
    img = img[...,::-1]        
        
    return img, errorMessage



def import_dataset2(input_dir, output_dir, log_filename):   
    df = None

    # Pick up where we left off if we had to kill the process as it was loading.
    if os.path.exists("faces.csv"):
        df = pd.read_csv("faces.csv")

    output_dir = os.path.expanduser(output_dir)
    
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
        
    dataset = facenet.get_dataset(input_dir)
    
    log_file = open(os.path.join(output_dir, log_filename), "w")
    
    images_total = 0
    successfully_aligned = 0
    
    for cls in dataset:
        output_class_dir = os.path.join(output_dir, cls.name)
        
        if not os.path.exists(output_class_dir):
            os.makedirs(output_class_dir)
        
        for image_path in cls.image_paths:
            images_total += 1
            filename = os.path.splitext(os.path.split(image_path)[1])[0]
            output_filename = os.path.join(output_class_dir, filename+'.png')
            
            print(image_path)
            
            if not os.path.exists(output_filename):
                img, errorMessage = load_image(image_path)

                if (img is None):
                    print(errorMessage)
                    log_file.write(errorMessage)
                    continue

                bounding_boxes, _ = align.detect_face.detect_face(img, MIN_FACE_SIZE, pnet, rnet, onet, DETECT_THRESHOLDS, SCALE_FACTOR)
                
                faces = bounding_boxes.shape[0]
                
                if faces>0:
                    det = bounding_boxes[:,0:4]
                    det_arr = []
                    img_size = np.asarray(img.shape)[0:2]
                    if faces>1:
                        bounding_box_size = (det[:,2]-det[:,0])*(det[:,3]-det[:,1])
                        img_center = img_size / 2
                        offsets = np.vstack([ (det[:,0]+det[:,2])/2-img_center[1], (det[:,1]+det[:,3])/2-img_center[0] ])
                        offset_dist_squared = np.sum(np.power(offsets,2.0),0)
                        index = np.argmax(bounding_box_size-offset_dist_squared*2.0) # some extra weight on the centering
                        det_arr.append(det[index,:])
                    else:
                        det_arr.append(np.squeeze(det))

                    for i, det in enumerate(det_arr):
                        det = np.squeeze(det)
                        bb = np.zeros(4, dtype=np.int32)
                        bb[0] = np.maximum(det[0], 0)
                        bb[1] = np.maximum(det[1], 0)
                        bb[2] = np.minimum(det[2], img_size[1])
                        bb[3] = np.minimum(det[3], img_size[0])
                        cropped = img[bb[1]:bb[3],bb[0]:bb[2],:]
                        scaled = cv2.resize(cropped, (INPUT_IMAGE_SIZE, INPUT_IMAGE_SIZE), interpolation=cv2.INTER_LINEAR) 
                        embedding = run_facenet(scaled)

                        df1 = pd.DataFrame([cls.name], columns=["id"])
                        df2 = pd.DataFrame(embedding)
                        row = pd.concat([df1, df2], axis=1)

                        if (df is None):
                            df = row
                        else:
                            df = df.append(row)

                        df.to_csv("faces.csv", index=False)

                        successfully_aligned += 1
                        filename_base, file_extension = os.path.splitext(output_filename)                                
                        output_filename_n = "{}{}".format(filename_base, file_extension)
                        cv2.imwrite(output_filename_n,scaled[...,::-1])
                else:
                    errorMessage = '{}: no faces'.format(image_path)
                    print(errorMessage)
                    log_file.write(errorMessage)
    
    print('Total number of images: %d' % images_total)
    print('Number of successfully aligned images: %d' % successfully_aligned)
    log_file.close()


In [118]:
input_dir = "./data/lfw"
output_dir = "./data/lfw-test-output"
import_dataset2(input_dir, output_dir, "logfile.txt")

./data/lfw/AJ_Cook/AJ_Cook_0001.jpg
./data/lfw/AJ_Lamas/AJ_Lamas_0001.jpg
./data/lfw/Aaron_Eckhart/Aaron_Eckhart_0001.jpg
./data/lfw/Aaron_Eckhart/Aaron_Eckhart_0002.jpg
./data/lfw/Aaron_Eckhart/Aaron_Eckhart_0002.jpg: failed to load
./data/lfw/Aaron_Guiel/Aaron_Guiel_0001.jpg
./data/lfw/Aaron_Patterson/Aaron_Patterson_0001.jpg
./data/lfw/Aaron_Peirsol/Aaron_Peirsol_0001.jpg
./data/lfw/Aaron_Peirsol/Aaron_Peirsol_0002.jpg
./data/lfw/Aaron_Peirsol/Aaron_Peirsol_0003.jpg
./data/lfw/Aaron_Peirsol/Aaron_Peirsol_0004.jpg
./data/lfw/Aaron_Pena/Aaron_Pena_0001.jpg
./data/lfw/Aaron_Sorkin/Aaron_Sorkin_0001.jpg
./data/lfw/Aaron_Sorkin/Aaron_Sorkin_0002.jpg
./data/lfw/Aaron_Tippin/Aaron_Tippin_0001.jpg
./data/lfw/Abba_Eban/Abba_Eban_0001.jpg
./data/lfw/Abbas_Kiarostami/Abbas_Kiarostami_0001.jpg
./data/lfw/Abdel_Aziz_Al-Hakim/Abdel_Aziz_Al-Hakim_0001.jpg
./data/lfw/Abdel_Madi_Shabneh/Abdel_Madi_Shabneh_0001.jpg
./data/lfw/Abdel_Nasser_Assidi/Abdel_Nasser_Assidi_0001.jpg
./data/lfw/Abdel_Nasser_As

In [119]:
OFFSET=10
TEST_FACES = 15

df_test = pd.read_csv("faces.csv")

embeddings = np.zeros((TEST_FACES, 128), dtype=np.float32)

for index, row in df_test.iterrows():
    adjusted_index = index - OFFSET
    
    if (adjusted_index == TEST_FACES):
        break
        
    if (adjusted_index >= 0):
        embedding = row.iloc[1:129].as_matrix().astype('float32')
        embeddings[adjusted_index,:] = np.ascontiguousarray(embedding.reshape(1, 128))
    

distances, indicies = face_index.search(embeddings, 2)
print(indicies)
print(distances)

for ix in range(len(indicies)):
    print(face_identities[indicies[ix,0]])

[[10 11]
 [11 10]
 [12 32]
 [13 85]
 [14 91]
 [15 27]
 [16  5]
 [17 18]
 [18 17]
 [19 22]
 [20 22]
 [21 92]
 [22 20]
 [23 45]
 [24 29]]
[[6.3201631e-05 7.1960950e-01]
 [5.4959943e-05 7.2201103e-01]
 [5.8500544e-05 1.0370395e+00]
 [6.6950524e-05 1.3488963e+00]
 [6.1983919e-05 1.2586166e+00]
 [1.2931286e-04 9.5449734e-01]
 [8.4476116e-05 1.2845230e+00]
 [4.1195308e-05 1.4957476e-01]
 [4.7756308e-05 1.4767630e-01]
 [5.9083090e-05 4.8276648e-01]
 [5.6836077e-05 2.9722852e-01]
 [5.1570346e-04 5.0957000e-01]
 [5.0823026e-05 2.9537436e-01]
 [3.6490237e-05 1.0283061e+00]
 [5.9899612e-05 1.1914573e+00]]
Aaron_Sorkin
Aaron_Sorkin
Aaron_Tippin
Abba_Eban
Abbas_Kiarostami
Abdel_Aziz_Al-Hakim
Abdel_Madi_Shabneh
Abdel_Nasser_Assidi
Abdel_Nasser_Assidi
Abdoulaye_Wade
Abdoulaye_Wade
Abdoulaye_Wade
Abdoulaye_Wade
Abdul_Majeed_Shobokshi
Abdul_Rahman
