# Build Face Database

Prepare the database for faces to recognize.

Download a data set under a ./data directory.  

This example uses Labeled Faces in the Wild (http://vis-www.cs.umass.edu/lfw/)

In [1]:
import os
import sys
import cv2
import numpy as np
import pandas as pd
from scipy import misc
from tqdm import tqdm

In [2]:
import tensorflow as tf
print(tf.__version__)

  from ._conv import register_converters as _register_converters


1.6.0-rc0


In [3]:
sys.path.append("../facenet/src")
import facenet
import align.detect_face

In [4]:
# Face Detection
MIN_FACE_SIZE = 20                     # minimum size of the face for the MTCNN
DETECT_THRESHOLDS = [ 0.6, 0.7, 0.7 ]  # threshold values for the three stages of the MTCNN
SCALE_FACTOR = 0.709                    # MTCNN scale factor

# Face Alignment/Sizing
MARGIN = 0
INPUT_IMAGE_SIZE = 160

In [5]:
with tf.Graph().as_default():
    sess = tf.Session(config=tf.ConfigProto(log_device_placement=False))
    with sess.as_default():
        pnet, rnet, onet = align.detect_face.create_mtcnn(sess, None)

Instructions for updating:
keep_dims is deprecated, use keepdims instead
Instructions for updating:
keep_dims is deprecated, use keepdims instead


In [6]:
MODEL_FILE = "./models/facenet/facenet.pb"
INPUT_TENSOR = "input:0"
OUTPUT_TENSOR = "embeddings:0"
PHASE_TRAIN = "phase_train:0"

facenet_graph = tf.Graph()
with facenet_graph.as_default():
    facenet_graph_def = tf.GraphDef()
    with tf.gfile.GFile(MODEL_FILE, 'rb') as fid:
        serialized_graph = fid.read()
        facenet_graph_def.ParseFromString(serialized_graph)
        tf.import_graph_def(facenet_graph_def, name='')

In [7]:
def run_facenet(image):
    image_data = np.around(image/255.0, decimals=12)
    image_data = np.expand_dims(image_data, axis=0)

    with tf.Session(graph=facenet_graph) as sess:
        result = sess.run(('embeddings:0'), feed_dict={'input:0':image_data, 'phase_train:0':False})
    
    return result

In [8]:
def import_dataset(input_dir, output_dir):
    df = None

    # Pick up where we left off if we had to kill the process as it was loading.
    if os.path.exists("faces.csv"):
        df = pd.read_csv("faces.csv")

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
        
    dataset = facenet.get_dataset(input_dir)
    
    for cls in dataset:
        output_class_dir = os.path.join(output_dir, cls.name)
        
        if not os.path.exists(output_class_dir):
            os.makedirs(output_class_dir)

        for image_path in cls.image_paths:
            filename = os.path.splitext(os.path.split(image_path)[1])[0]
            output_filename = os.path.join(output_class_dir, filename+'.png')
            
            # Print what name we are on to give some idea of progress.
            print(cls.name)
            
            if os.path.exists(output_filename):
                continue
                
            try:
                image = misc.imread(image_path)
            except (IOError, ValueError, IndexError) as e:
                errorMessage = '{}: {}'.format(image_path, e)
                print(errorMessage)
            else:
                if image.ndim<3:
                    print('Unable to align "%s"' % image_path)
                    continue
                    
                # remove alpha
                image = image[:,:,0:3]

                height, width = image.shape[0:2]
                
                bb, _ = align.detect_face.detect_face(image, MIN_FACE_SIZE, pnet, rnet, onet, DETECT_THRESHOLDS, SCALE_FACTOR)

                faces = bb.shape[0]
                
                if (faces == 1):  
                    box = np.zeros(4, dtype=np.int32)
                    box[0] = np.maximum(bb[0, 0]-MARGIN/2, 0)
                    box[1] = np.maximum(bb[0, 1]-MARGIN/2, 0)
                    box[2] = np.minimum(bb[0, 2]+MARGIN/2, width)
                    box[3] = np.minimum(bb[0, 3]+MARGIN/2, height)
                    
                    cropped = image[box[1]:box[3],box[0]:box[2],:]

                    scaled = misc.imresize(cropped, (INPUT_IMAGE_SIZE, INPUT_IMAGE_SIZE), interp='bilinear')
                    embedding = run_facenet(scaled)
                                    
                    df1 = pd.DataFrame([cls.name], columns=["id"])
                    df2 = pd.DataFrame(embedding)
                    row = pd.concat([df1, df2], axis=1)

                    if (df is None):
                        df = row
                    else:
                        df = df.append(row)

                    df.to_csv("faces.csv", index=False)

                    filename_base, file_extension = os.path.splitext(output_filename)
                    output_filename_n = "{}{}".format(filename_base, file_extension)
                    misc.imsave(output_filename_n, scaled)

In [9]:
input_dir = "./data/lfw"
output_dir = "./data/lfw-test-output"
import_dataset(input_dir, output_dir)

AJ_Cook
AJ_Lamas
Aaron_Eckhart
Aaron_Guiel


`imread` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``imageio.imread`` instead.
`imresize` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``skimage.transform.resize`` instead.
`imsave` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``imageio.imwrite`` instead.


Aaron_Patterson
Aaron_Peirsol
Aaron_Peirsol


KeyboardInterrupt: 