# 1. Set Up

In [2]:


#mount Goolge Drive
from google.colab import drive
drive.mount('/content/gdrive')

#set file path
path = "/content/gdrive/My Drive/mask_detect/"

Mounted at /content/gdrive


In [4]:
! pip install mtcnn

Collecting mtcnn
[?25l  Downloading https://files.pythonhosted.org/packages/67/43/abee91792797c609c1bf30f1112117f7a87a713ebaa6ec5201d5555a73ef/mtcnn-0.1.0-py3-none-any.whl (2.3MB)
[K     |████████████████████████████████| 2.3MB 8.8MB/s 
Installing collected packages: mtcnn
Successfully installed mtcnn-0.1.0


In [5]:
#import library
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Conv2D, ZeroPadding2D, Activation, Input, concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import MaxPooling2D, AveragePooling2D
from tensorflow.keras.layers import Concatenate
from tensorflow.keras.layers import Lambda, Flatten, Dense
from tensorflow.keras.initializers import glorot_uniform
from tensorflow.keras import layers
import cv2
import os
import numpy as np
from numpy import genfromtxt
import pandas as pd
import h5py
import matplotlib.pyplot as plt

import mtcnn
from mtcnn.mtcnn import MTCNN
from matplotlib import pyplot as plt
from tensorflow.keras.models import load_model
from scipy.spatial.distance import cosine
from PIL import Image
from tensorflow.keras import backend as K

if K.backend()=='tensorflow':
    keras.backend.set_image_data_format("channels_last")

%matplotlib inline
%load_ext autoreload
%autoreload 2

# 2. Data Processing

In [41]:
# create the detector, using default weights
detector = MTCNN()
def extract_face(filename, required_size=(160, 160)):
    image = Image.open(filename)
    # convert to RGB, if needed
    image = image.convert('RGB')
    # convert to array
    pixels = np.asarray(image)
    # detect faces in the image
    results = detector.detect_faces(pixels)
    # extract the bounding box from the first face
    if len(results)>0:
      x1, y1, width, height = results[0]['box']
      # deal with negative pixel index
      x1, y1 = abs(x1), abs(y1)
      x2, y2 = x1 + width, y1 + height
      # extract the face
      face = pixels[y1:y2, x1:x2]
      # resize pixels to the model size
      image = Image.fromarray(face)
      image = image.resize(required_size)
      face_array = np.asarray(image)
      return face_array
    return []



# 3. Load Model

In [9]:
#Define Triplet_loss
def triplet_loss(y_true, y_pred, alpha = 0.2):
    
    anchor, positive, negative = y_pred[0], y_pred[1], y_pred[2]
  
    # Step 1: Compute the (encoding) distance between the anchor and the positive
    pos_dist = tf.reduce_sum(tf.square(tf.subtract(anchor, positive)), axis=-1)
    # Step 2: Compute the (encoding) distance between the anchor and the negative
    neg_dist = tf.reduce_sum(tf.square(tf.subtract(anchor, negative)), axis=-1)
    # Step 3: subtract the two previous distances and add alpha.
    basic_loss = tf.add(tf.subtract(pos_dist, neg_dist), alpha)
    # Step 4: Take the maximum of basic_loss and 0.0. Sum over the training examples.
    loss = tf.reduce_sum(tf.maximum(basic_loss, 0.0))
    
    return loss

#load pre-trained FaceNet Model
facenet_model = load_model(path+'facenet/facenet_keras.h5', custom_objects={ 'loss': triplet_loss })



# 4. Get image encoding

In [38]:
from sklearn.preprocessing import Normalizer
in_encoder = Normalizer('l2')

def get_embedding(model, face):
    # scale pixel values
    face = face.astype('float32')
    # standardization
    mean, std = face.mean(), face.std()
    face = (face - mean) / std
    print(face.shape)
    face = cv2.resize(face,(160,160))
    face = np.expand_dims(face, axis=0)
    print(face.shape)
    encode = model.predict(face)[0]
    print(encode)
    return encode

def who_is_it(image_path, database, model):
    
    #Compute the target "encoding" for the image
    face = extract_face(image_path)
    if len(face)>0:
      encoding = get_embedding(facenet_model,face)
      encoding = in_encoder.transform(np.expand_dims(encoding, axis=0))[0]
      
      # Initialize "min_dist" to a large value
      min_dist = float("inf")
      
      identity = ""
      # Loop over the database dictionary's names and encodings.
      for (name, db_enc) in database.items():
          
          # Compute L2 distance between the target "encoding" and the current db_enc
          dist = cosine(db_enc,encoding)

          # If this distance is less than the min_dist, then set min_dist to dist, and identity to name
          if dist < 0.5 and dist < min_dist:
              min_dist = dist
              identity = name

      
      if min_dist > 0.5:
          print("Not in the database.")
      else:
          print ("it's " + str(identity) + ", the distance is " + str(min_dist))
          
      return min_dist, identity
    return "no face detected"

# 5. Build image encoding library

In [43]:
database = {}
norman = extract_face(path+"facenet/data/IMG_4574.JPG")
database["norman"] = get_embedding(facenet_model,norman)
#print(get_embedding(facenet_model,norman).shape)
print(database)

#test
who_is_it(path+"facenet/data/IMG_4577.JPG",database,facenet_model) # norma pic with glasses - OK
who_is_it(path+"facenet/data/IMG_4576.JPG",database,facenet_model)  # covered mouth -- no face
who_is_it(path+"facenet/data/IMG_4575.JPG",database,facenet_model)  # side face  -- not identified

(160, 160, 3)
(1, 160, 160, 3)
[-0.7013086  -0.7869352   0.36629578 -1.0533936  -0.11811689  0.71785367
 -0.7721825  -0.21287586  0.6297333  -0.9028784   0.92580867 -0.08018784
  0.09103934  0.0908369  -0.9809562   0.4551217   1.1686488  -0.90603405
 -0.35406125  0.193023    0.8138758  -0.77828807 -0.48039156 -0.3942199
  0.6070982   0.35871667 -0.20042796  0.6532146   0.32706928  0.01547497
  0.3482965   0.84028375 -0.1748165   0.48380804 -0.14907266  0.37632012
  0.6760591   0.3183003   0.10559088 -0.07784468  0.96952796  0.21898931
 -0.3580508  -0.10020782 -0.882256   -1.0529208   0.1545275   0.11531702
 -0.4560889   0.780159   -0.55949104 -0.5095497  -0.01773892  0.24359408
 -0.3966319   0.36635318  1.224584   -0.5429244  -0.5761664  -0.27741158
 -0.7366908  -0.04805714  0.28772098  1.3381075  -0.6891946   0.31329948
  0.1843094  -0.40614292  0.954824   -0.5978664   0.06033286 -0.11532794
 -0.6174114   0.4522825  -0.01007149  0.06965208  0.05831638 -0.6743223
  0.26588488 -0.366090

(inf, '')

# 6. Output Model and Encoding DB

In [24]:
import csv

with open(path+'facenet/dict.csv', 'w') as csv_file:  
    writer = csv.writer(csv_file)
    for key, value in database.items():
       value = list(value)
       writer.writerow([key, value])
        # TODO: need to append, not overwrite

    #for key, value in database.items():
       #value = list(value)
       #value = np.fromstring(value.tostring())
       #re.sub("\s+", ",", value.strip())
       #value = "[" + value +"]"
       #print(value)
       #writer.writerow([key, value])