# FaceNET
### Docker
docker run --gpus all -it -p 8888:8888 -v /media/v1pi/DATA/DOCUMENTS/Workspaces:/tf/Workspaces -w /tf/Workspaces  tensorflow/tensorflow:latest-gpu-py3-jupyter
### Dependencies
apt-get update <br>
apt-get install cmake libsm6 libxext6 libxrender-dev <br>
pip3 install opencv-python dlib requests keras tensorflow_hub sklearn tqdm pandas <br>
pip3 install torch==1.5.0+cu101 torchvision==0.6.0+cu101 -f https://download.pytorch.org/whl/torch_stable.html <br>
pip3 install tensorflowjs --no-deps <br>

In [4]:
import glob
import os
import cv2
import numpy as np
import logging
import tensorflow as tf
from keras import backend as K
from tensorflow.keras.models import model_from_json
from preprocess import init_pre_proccess
import tensorflowjs as tfjs
from numpy import savez_compressed
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import Normalizer
from sklearn.svm import SVC
from tqdm.notebook import tqdm
from datetime import datetime

Using TensorFlow backend.


In [7]:
# Define triplet_loss function
def triplet_loss(y_true, y_pred, alpha = 0.3):
    anchor, positive, negative = y_pred[0], y_pred[1], y_pred[2]

    pos_dist = tf.reduce_sum(tf.square(tf.subtract(anchor,
               positive)), axis=-1)
    neg_dist = tf.reduce_sum(tf.square(tf.subtract(anchor, 
               negative)), axis=-1)
    basic_loss = tf.add(tf.subtract(pos_dist, neg_dist), alpha)
    loss = tf.reduce_sum(tf.maximum(basic_loss, 0.0))
   
    return loss

# Export model to tfjs
def save_to_tfjs(model):
    #tfjs.converters.save_keras_model(model, os.path.join(ROOT_PATH, 'models', 'tfjs'))
    tfjs.converters.save_keras_model(model, os.path.join(ROOT_PATH, 'models', 'tfjs'))

# Export to model format
def save_model(model, name='model1'):
    pathModel = os.path.join(ROOT_PATH, 'models', 'faceNET', '{0}.json'.format(name))
    pathWeights = os.path.join(ROOT_PATH, 'models', 'faceNET', '{0}.h5'.format(name))
    # serialize model to JSON
    model_json = model.to_json()
    with open(pathModel, "w") as json_file:
        json_file.write(model_json)
    # serialize weights to HDF5
    model.save_weights(pathWeights)
    print("Saved model to disk")
    
# load json and create model
def load_model_from_json(name='model1'):
    pathModel = os.path.join(ROOT_PATH, 'models', 'faceNET', '{0}.json'.format(name))
    pathWeights = os.path.join(ROOT_PATH, 'models', 'faceNET', '{0}.h5'.format(name))
    json_file = open(pathModel, 'r')
    loaded_model_json = json_file.read()
    json_file.close()
    loaded_model = model_from_json(loaded_model_json)
    # load weights into new model
    loaded_model.load_weights(pathWeights)
    print("Loaded model from disk")
    return loaded_model

def load_model(name='model1'):
    pathModel = os.path.join(ROOT_PATH, 'models', 'FaceNET-Keras', '{0}.h5'.format(name))
    pathWeights = os.path.join(ROOT_PATH, 'models', 'FaceNET-Keras', '{0}_weights.h5'.format(name))
    loaded_model = tf.keras.models.load_model(pathModel)
    # load weights into new model
    loaded_model.load_weights(pathWeights)
    print("Loaded model from disk")
    return loaded_model

In [8]:
def who_is_it(image, database, model):
    encoding = img_to_encoding(image, model)
    
    min_dist = 100
    identity = None
    
    # Loop over the database dictionary's names and encodings.
    for (name, db_enc) in database.items():
        dist = np.linalg.norm(db_enc - encoding)
        print('distance for %s is %s' %(name, dist))
        if dist < min_dist:
            min_dist = dist
            identity = name
    
    if min_dist > 0.55:
        return None
    else:
        return identity
def get_embedding(model, face_pixels):
    # scale pixel values
    face_pixels = face_pixels.astype('float32')
    # standardize pixel values across channels (global)
    mean, std = face_pixels.mean(), face_pixels.std()
    face_pixels = (face_pixels - mean) / std
    # transform face into one sample
    samples = np.expand_dims(face_pixels, axis=0)
    # make prediction to get embedding
    yhat = model.predict(samples)
    return yhat[0]

def img_to_encoding(image_path, model):
    img1 = cv2.imread(image_path, 1)
    #img = img1[...,::-1]
    #img = np.reshape(img1, (3,96,96))
    
    #x_train = np.array([img])
    #embedding = model.predict_on_batch(x_train)
    return get_embedding(model, img1)

In [9]:
# Allow logging
logging.basicConfig(level=logging.INFO)
# Get current path
ROOT_PATH = os.path.dirname(os.path.realpath('__file__'))
physical_devices = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], True)

In [17]:
# Preprocess images
init_pre_proccess(os.path.join(ROOT_PATH, 'images', 'train'), os.path.join(ROOT_PATH, 'output', 'train'), 160)







INFO:preprocess:Completed in 910.2716233730316 seconds


In [18]:
init_pre_proccess(os.path.join(ROOT_PATH, 'images', 'test'), os.path.join(ROOT_PATH, 'output', 'test'), 160)

INFO:preprocess:Completed in 6.556511878967285 seconds


In [10]:
frModel = load_model('facenet_keras')
#frModel.compile(optimizer = 'adam', loss = triplet_loss, metrics = ['accuracy'])





Loaded model from disk


In [11]:
def train(actor, model, type_mode = 'train'):
    image_paths = []
    for ext in ('*.jpeg', '*.jpg', '*.png'):        
        image_paths.extend(glob.glob(os.path.join(ROOT_PATH, 'output', type_mode, actor, ext)))
    
    results = []
    for image in image_paths:
        encoding = img_to_encoding(image, model)
        results.append(encoding)
    return np.array(results)

def test_data(actor, database, model):
    images = []
    for ext in ('*.jpeg', '*.jpg', '*.png'):
        images.extend(glob.glob(os.path.join(ROOT_PATH, 'output', 'test', actor, ext)))
    for image in images:
        print(who_is_it(image, database, model))

def read_database_ids(filename):
    actors_ids = []
    with open(filename) as f:
        actors_ids = [line.rstrip() for line in f]
    return actors_ids

In [12]:
def create_encoding_database(database_name, type_mode= 'train'):
    database = {}
    actors_ids = read_database_ids(os.path.join(ROOT_PATH, 'database', database_name))
    total_actors = int(actors_ids[0])
    with tqdm(total= total_actors) as pbar:
        for actor_id in actors_ids[1:]:
            actor_dir = os.path.join(ROOT_PATH, 'output', type_mode, actor_id)
            if os.path.isdir(actor_dir) and len(os.listdir(actor_dir)) != 0:
                database[actor_id] = train(actor_id, frModel, type_mode)
            pbar.update(1)
    return database

In [13]:
def preprocess_database(database):
    labels = []
    values = []
    for key, value in database.items():    
        for v in value:
            values.append(np.reshape(v, [128]))
            labels.append(np.array([key]))
    labels = np.array(labels)
    values = np.array(values)
    return values, labels, list(database.keys())

In [14]:
def create_processed_data(database_ids_name, processed_file_name, type_mode='train'):
    database = create_encoding_database(database_ids_name, type_mode)
    values, labels, classes = preprocess_database(database)
    str_date_now = datetime.today().strftime('%d-%m-%Y')
    savez_compressed(os.path.join(ROOT_PATH, 'processed', '{0}_{1}_embeddings.npz'.format(processed_file_name,str_date_now)), values, labels, classes)

In [48]:
# Cria o BD Encoding de treino
create_processed_data('database_13-06-2020', 'trained')

HBox(children=(FloatProgress(value=0.0, max=9501.0), HTML(value='')))




In [47]:
# Cria o BD Encoding de teste
create_processed_data('test_database_13-06-2020', 'test', 'test')

HBox(children=(FloatProgress(value=0.0, max=7.0), HTML(value='')))




## Carergando os dados do BD

In [15]:
with np.load(os.path.join(ROOT_PATH, 'processed', 'trained_13-06-2020_embeddings.npz'), allow_pickle=True) as f:
    values, labels, classes = f['arr_0'], f['arr_1'], f['arr_2']

In [16]:
with np.load(os.path.join(ROOT_PATH, 'processed', 'test_13-06-2020_embeddings.npz'), allow_pickle=True) as f:
    values_test, labels_test = f['arr_0'], f['arr_1']

## Classificando com SVM

In [5]:
in_encoder = Normalizer(norm='l2')
x_train = in_encoder.transform(values)

In [6]:
out_encoder = LabelEncoder()
out_encoder.fit(labels)
y_train = out_encoder.transform(labels)

  return f(**kwargs)


In [7]:
model = SVC(kernel='linear', probability=True)

In [8]:
model.fit(x_train, y_train)

SVC(kernel='linear', probability=True)

In [9]:
x_test = in_encoder.transform(values_test)
y_test = out_encoder.transform(labels_test)
model.score(x_test, y_test)

  return f(**kwargs)


0.8571428571428571

In [41]:
prob = model.predict_proba([x_train[0]])

## Classficando com Fully Connected

In [17]:
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense, Input, concatenate
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.models import model_from_json
import pandas as pd

In [18]:
def create_classification_model(n_classes):
    input_shape = Input(shape=(128))  
    out = Dense(256, activation='relu')(input_shape)
    out = Dense(512, activation='relu')(out)
    out = Dense(n_classes, activation='softmax')(out)
    
    model = Model(input_shape, out)
    return model

In [19]:
n_classes = len(classes)

In [20]:
classes = np.reshape(classes, (n_classes,))

In [21]:
classes_dummies = pd.get_dummies(classes)
classes_ids = classes_dummies.columns.to_list()

In [22]:
y_train_real = np.array([classes_ids.index(x) for x in labels])

In [23]:
y_train = to_categorical(y_train_real, num_classes=n_classes)

In [24]:
y_test_real = np.array([classes_ids.index(x) for x in labels_test])
y_test = to_categorical(y_test_real, num_classes=n_classes)

In [25]:
def optimizer():
    return SGD(lr=1e-2)

In [26]:
in_encoder = Normalizer(norm='l2')
x_train = in_encoder.transform(values)
x_test = in_encoder.transform(values_test)

In [27]:
model_name = 'classification9'

In [35]:
batch_size = 2
try:
    model_tf = load_model_from_json(model_name)
except:
    model_tf = create_classification_model(n_classes)
model_tf.compile(optimizer=optimizer(),
          loss='categorical_crossentropy',
          metrics=['accuracy']
          )
model_tf.fit(x_train, y_train, batch_size=batch_size, epochs=10, validation_data=(x_test,y_test),verbose=1)
model_tf.summary()
save_model(model_tf, model_name)

Loaded model from disk
Train on 17870 samples, validate on 28 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 128)]             0         
_________________________________________________________________
dense (Dense)                (None, 256)               33024     
_________________________________________________________________
dense_1 (Dense)              (None, 512)               131584    
_________________________________________________________________
dense_2 (Dense)              (None, 7720)              3960360   
Total params: 4,124,968
Trainable params: 4,124,968
Non-trainable params: 0
_________________________________________________________________
Saved model to disk


In [28]:
model = load_model_from_json(model_name)
model.compile(optimizer=optimizer(),
          loss='categorical_crossentropy',
          metrics=['accuracy']
          )
scores = model.evaluate(x_test, y_test, verbose=1)
print("Accuracy: %.2f%%" % (scores[1]*100), "| Loss: %.5f" % (scores[0]))
result = model.predict(x_test)

Loaded model from disk
Accuracy: 96.43% | Loss: 0.28114


In [27]:
def show_predicts(data, answer, classes_ids):
    count = 0
    for d in data:
        pos = np.argmax(d)
        predict_answer = classes_ids[pos]  
        pos = np.argmax(answer[count])
        new_answer = classes_ids[pos]
        print('expected {0} received {1}'.format(new_answer, predict_answer))
        count += 1

In [28]:
show_predicts(result, y_test, classes_ids)

expected 131 received 131
expected 131 received 131
expected 131 received 131
expected 131 received 131
expected 1245 received 1245
expected 1245 received 1245
expected 1245 received 1245
expected 1245 received 1245
expected 1339 received 1339
expected 1339 received 1339
expected 1339 received 1339
expected 1339 received 1339
expected 1276 received 1276
expected 1276 received 1276
expected 1276 received 1276
expected 1276 received 1276
expected 1331 received 1331
expected 1331 received 1331
expected 1331 received 1331
expected 1331 received 1331
expected 1909 received 1909
expected 1909 received 1909
expected 1909 received 1909
expected 1909 received 14886
expected 1665 received 1665
expected 1665 received 1665
expected 1665 received 1665
expected 1665 received 1665


In [31]:
pathModel = os.path.join(ROOT_PATH, 'database', 'classes_ids')

teste = ''
for classes in classes_ids:
    teste += classes + '\n'

with open(pathModel, "w") as file:
    file.write(teste)

In [28]:
tfjs.__version__

'1.7.4r1'