In [1]:
import pandas as pd
import numpy as np
from numpy import asarray

from PIL import Image

from numpy import load
from numpy import expand_dims
from numpy import savez_compressed

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Flatten, Dropout
from tensorflow.keras.models import load_model

from tqdm import tqdm
from tqdm import tqdm_notebook

import ast

import matplotlib.pyplot as plt
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

In [2]:
# load the FaceNet model
facenet_model = load_model('FaceNet_Model/facenet_keras.h5')
facenet_model.load_weights('FaceNet_Model/facenet_keras_weights.h5')
print(facenet_model.inputs)
print(facenet_model.outputs)

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
[<tf.Tensor 'input_1:0' shape=(?, 160, 160, 3) dtype=float32>]
[<tf.Tensor 'Bottleneck_BatchNorm/batchnorm/add_1:0' shape=(?, 128) dtype=float32>]


In [3]:
#Load the converted metadata files for the IMDB and Wikipedia images
photo_df = pd.read_csv('photo_info_df/All_photo_data.csv')

In [4]:
photo_df.head()

Unnamed: 0,name,dob,gender,photo_taken,age_when_taken,file_path,face_location,face_score
0,Fred Astaire,1899-05-10,1.0,1968,69,image_data/imdb_data/01/nm0000001_rm124825600_...,[1072.926 161.838 1214.784 303.696],1.459693
1,Fred Astaire,1899-05-10,1.0,1970,71,image_data/imdb_data/01/nm0000001_rm3343756032...,[477.184 100.352 622.592 245.76 ],2.543198
2,Fred Astaire,1899-05-10,1.0,1968,69,image_data/imdb_data/01/nm0000001_rm577153792_...,[114.96964309 114.96964309 451.68657236 451.68...,3.455579
3,Fred Astaire,1899-05-10,1.0,1968,69,image_data/imdb_data/01/nm0000001_rm946909184_...,[622.88550564 424.21750384 844.33900767 645.67...,1.872117
4,Fred Astaire,1899-05-10,1.0,1968,69,image_data/imdb_data/01/nm0000001_rm980463616_...,[1013.85900236 233.88204221 1201.5861278 42...,1.158766


# Obtain The Embeddings Of Each Picture

In [5]:
#This function opens the file and obtains an array representation of the picture
#It converts to RGB if the image is black and white
def obtain_image_pixels(filename):
    image = Image.open(filename)
    image = image.convert('RGB')
    return asarray(image)

In [6]:
#Resize the picture for model expectations
def resize_picture(image_array, dimensions = (160,160)):
    face_array_resized = Image.fromarray(image_array)
    face_array_resized = face_array_resized.resize(dimensions)
    return asarray(face_array_resized)

In [7]:
#The following function is from 'Deep Learning for Computer Vision' by Jason Brownlee, Page (508)

def get_embedding(filename):
    #obtain the face pixels
    cropped_face_array = obtain_image_pixels(filename)
    resized_face = resize_picture(cropped_face_array)
    
    # standardize pixel values across channels
    resized_face = resized_face.astype('float32')
    mean, std = resized_face.mean(), resized_face.std()
    resized_face = (resized_face - mean) / std
    
    
    # transform face into one sample
    observation = expand_dims(resized_face, axis=0)
    # make prediction to get embedding
    yhat = facenet_model.predict(observation)
    return yhat[0]
    

# Obtain the Embeddings Array for Each Picture

I will be performing this step in batches of 50,000 to ensure I save my data

In [8]:
photo_df_a = photo_df[0:50000]
photo_df_b = photo_df[50000:100000]
photo_df_c = photo_df[100000:150000]
photo_df_d = photo_df[150000:200000]
photo_df_e = photo_df[200000:250000]
photo_df_f = photo_df[250000:300000]
photo_df_g = photo_df[300000:350000]
photo_df_h = photo_df[350000:]



## Photo Dataframe With Embeddings - Part A

In [9]:
tqdm.pandas()
photo_df_a['embeddings_fn'] = photo_df_a.file_path.progress_apply(lambda x: get_embedding(x))

100%|██████████| 50000/50000 [1:19:12<00:00, 10.52it/s]     


In [10]:
photo_df_a.to_csv('photo_info_df/photo_dfs_with_embeddings_fn/photo_dataframe_fn_embeddings_a.csv', index = False)

## Photo Dataframe With Embeddings - Part B

In [20]:
tqdm.pandas()
photo_df_b['embeddings_fn'] = photo_df_b.file_path.progress_apply(lambda x: get_embedding(x))

100%|██████████| 50000/50000 [1:07:18<00:00, 12.38it/s]


In [21]:
photo_df_b.to_csv('photo_info_df/photo_dfs_with_embeddings_fn/photo_dataframe_fn_embeddings_b.csv', index = False)

## Photo Dataframe With Embeddings - Part C

In [22]:
tqdm.pandas()
photo_df_c['embeddings_fn'] = photo_df_c.file_path.progress_apply(lambda x: get_embedding(x))

100%|██████████| 50000/50000 [1:09:04<00:00, 11.91it/s]


In [23]:
photo_df_c.to_csv('photo_info_df/photo_dfs_with_embeddings_fn/photo_dataframe_fn_embeddings_c.csv', index = False)

## Photo Dataframe With Embeddings - Part D

In [24]:
tqdm.pandas()
photo_df_d['embeddings_fn'] = photo_df_d.file_path.progress_apply(lambda x: get_embedding(x))

100%|██████████| 50000/50000 [1:15:48<00:00, 11.59it/s]


In [25]:
photo_df_d.to_csv('photo_info_df/photo_dfs_with_embeddings_fn/photo_dataframe_fn_embeddings_d.csv', index = False)

## Photo Dataframe With Embeddings - Part E

In [9]:
tqdm.pandas()
photo_df_e['embeddings_fn'] = photo_df_e.file_path.progress_apply(lambda x: get_embedding(x))

100%|██████████| 50000/50000 [1:11:04<00:00, 11.72it/s]


In [10]:
photo_df_e.to_csv('photo_info_df/photo_dfs_with_embeddings_fn/photo_dataframe_fn_embeddings_e.csv', index = False)

## Photo Dataframe With Embeddings - Part F

In [11]:
tqdm.pandas()
photo_df_f['embeddings_fn'] = photo_df_f.file_path.progress_apply(lambda x: get_embedding(x))

100%|██████████| 50000/50000 [1:06:42<00:00, 10.66it/s]


In [12]:
photo_df_f.to_csv('photo_info_df/photo_dfs_with_embeddings_fn/photo_dataframe_fn_embeddings_f.csv', index = False)

## Photo Dataframe With Embeddings - Part G

In [13]:
tqdm.pandas()
photo_df_g['embeddings_fn'] = photo_df_g.file_path.progress_apply(lambda x: get_embedding(x))

100%|██████████| 50000/50000 [1:16:59<00:00, 12.67it/s]  


In [14]:
photo_df_g.to_csv('photo_info_df/photo_dfs_with_embeddings_fn/photo_dataframe_fn_embeddings_g.csv', index = False)

## Photo Dataframe With Embeddings - Part H

In [15]:
tqdm.pandas()
photo_df_h['embeddings_fn'] = photo_df_h.file_path.progress_apply(lambda x: get_embedding(x))

100%|██████████| 22917/22917 [31:03<00:00, 12.30it/s] 


In [16]:
photo_df_h.to_csv('photo_info_df/photo_dfs_with_embeddings_fn/photo_dataframe_fn_embeddings_h.csv', index = False)

# Testing The Loaded Dataframes

In [None]:
photo_df_a.embeddings_fn[0]

In [37]:
photo_df_d_test = pd.read_csv('photo_info_df/photo_dfs_with_embeddings_fn/photo_dataframe_fn_embeddings_d.csv')

In [38]:
photo_df_d_test.head()

Unnamed: 0,name,dob,gender,photo_taken,age_when_taken,file_path,face_location,face_score,embeddings_fn
0,Dean Norris,1963-04-08,1.0,2013,50,image_data/imdb_data/87/nm0606487_rm683335168_...,[237.37972736 119.18986368 325.02212512 206.83...,1.589262,[ 0.5910256 0.72979903 -1.4436709 0.832029...
1,Dean Norris,1963-04-08,1.0,2013,50,image_data/imdb_data/87/nm0606487_rm711508736_...,[526.336 231.424 819.2 524.288],5.229899,[-0.066248 1.5444028 0.72457373 0.946269...
2,Dean Norris,1963-04-08,1.0,2013,50,image_data/imdb_data/87/nm0606487_rm745063168_...,[ 491.22600107 307.78425067 1039.50325227 85...,4.225081,[-0.42622527 0.3229443 -1.372813 0.836303...
3,Dean Norris,1963-04-08,1.0,2007,44,image_data/imdb_data/87/nm0606487_rm754944256_...,[ 57.812 57.812 228.864 228.864],4.070058,[-0.20527998 0.40051752 -0.746986 -0.527946...
4,Dean Norris,1963-04-08,1.0,2008,45,image_data/imdb_data/87/nm0606487_rm767731968_...,[1050.94364355 365.12726123 1411.97490478 72...,2.764822,[ 1.8527697e-01 7.5647783e-01 -4.2893928e-02 ...


In [28]:
#This function converts the saved string in the 'embeddings_fn' column to a numpy array

def convert_csv_to_embeddings(embedding_string):
    
    #I replace the '\n' and spaces in descending sequential order
    embedding_string = embedding_string.replace('\n', '').replace('     ', ' ').replace('    ', ' ').replace('   ', ' ').replace('  ', ' ').replace('[ ', '[').replace(' ]', ']').replace(' ', ', ')
    
    #This returns the string as an array in the proper type
    return asarray(ast.literal_eval(embedding_string)).astype('float32')
    

In [39]:
photo_df_d_test.embeddings_fn = photo_df_d_test.embeddings_fn.apply(lambda x: convert_csv_to_embeddings(x))

In [40]:
photo_df_d_test.tail()

Unnamed: 0,name,dob,gender,photo_taken,age_when_taken,file_path,face_location,face_score,embeddings_fn
49995,Autumn Reeser,1980-09-21,0.0,2003,23,image_data/imdb_data/08/nm1032208_rm1830721536...,[299.3115723 72.32877735 406.17973833 179.19...,4.763995,"[-0.5000435, 0.00827631, -0.9710353, 0.1913891..."
49996,Autumn Reeser,1980-09-21,0.0,2003,23,image_data/imdb_data/08/nm1032208_rm1847498752...,[117.29449931 55.54152909 178.39746954 116.64...,2.534724,"[-0.35872468, -0.41892233, -0.34107652, 0.4779..."
49997,Autumn Reeser,1980-09-21,0.0,2004,24,image_data/imdb_data/08/nm1032208_rm1998948352...,[ 98.95796658 74.37447494 208.95967899 184.37...,4.504549,"[-0.5445585, -1.0659486, 0.98557085, -0.59999,..."
49998,Autumn Reeser,1980-09-21,0.0,2003,23,image_data/imdb_data/08/nm1032208_rm2082379776...,[216.96953005 124.36830288 354.97137079 262.37...,3.794728,"[-1.4261917, -1.3674953, 0.7790535, 0.32420912..."
49999,Autumn Reeser,1980-09-21,0.0,2003,23,image_data/imdb_data/08/nm1032208_rm2132711424...,[ 87.3489876 40.05863073 157.63452292 110.34...,4.297943,"[-1.6472375, -1.7480907, -0.33489174, -0.07722..."


In [36]:
type(photo_df_c_test.embeddings_fn[499])

numpy.ndarray

In [None]:
def resize_picture(filename, dimensions = (160,160), margin = 0):
            
    # load the image
    image = Image.open(filename)
    image = image.convert('RGB')
    image_array = asarray(image)
    
    #Set a margin boolean and while loop to try margin value
    margin_error = True
    
    while margin_error:
    
        try:
            #Crop the face further with MTCNN
            detector = MTCNN()
    
            #Obtain the first detected face in the cropped face picture
            first_detected_face = detector.detect_faces(image_array)[0]
        
            # get coordinates
            x1, y1, width, height = first_detected_face['box']
            x2, y2 = x1 + width + margin, y1 + height + margin       
            x1 -= margin
            y1 -= margin 
        
            face_array = image_array[y1:y2, x1:x2]
    
        except:
        
            face_array = image_array
        
        try:
            face_array_resized = Image.fromarray(face_array)
            face_array_resized = face_array_resized.resize(dimensions)
            
            margin_error = False
            break
            
        except:
            
            if margin > 0: 
                margin -= 1
            else:
                face_array_resized = Image.fromarray(image_array)
                face_array_resized = face_array_resized.resize(dimensions)
                break
    
    return asarray(face_array_resized)

In [None]:
#The following function is from 'Deep Learning for Computer Vision' by Jason Brownlee, Page (508)

# get the face embedding for one face
def get_embedding(model, face_pixels):
    # scale pixel values
    face_pixels = face_pixels.astype('float32')
    # standardize pixel values across channels (global)
    mean, std = face_pixels.mean(), face_pixels.std()
    face_pixels = (face_pixels - mean) / std
    # transform face into one sample
    samples = expand_dims(face_pixels, axis=0)
    # make prediction to get embedding
    yhat = model.predict(samples)
    return yhat[0]

In [None]:
# load the FaceNet model
facenet_model = load_model('FaceNet_Model/facenet_keras.h5')
#facenet_model.load_weights('FaceNet_Model/facenet_keras_weights.h5')
#    print(facenet_model.inputs)
#    print(facenet_model.outputs)

In [None]:
get_embedding(facenet_model, picture1)

In [None]:
#A check to see if the file path exists
file_exists = 0
file_not_exist = 0

for i in photo_info.file_path:
    if path.exists(i):
        file_exists += 1
    else: 
        file_not_exist += 1
        
print(file_exists)
print(file_not_exist)