In [1]:
import tensorflow as tf
physical_devices = tf.config.list_physical_devices('GPU') # чтобы подтвердить, что TensorFlow использует графический процессор.
tf.config.experimental.set_memory_growth(physical_devices[0], True)

In [2]:
import numpy as np
import cv2
from mtcnn.mtcnn import MTCNN
import os
import glob
from fr_utils import *
from inception_blocks_v2 import *
from utils import *
from keras import backend as K
# from keras.models import load_model

# encoder_model = 'facenet_keras.h5'
from sklearn.preprocessing import Normalizer
l2_normalizer = Normalizer('l2')

face_detector = MTCNN()
# face_encoder = load_model(encoder_model)

In [3]:
PADDING = 50
ready_to_detect_identity = True

K.set_image_data_format('channels_first')
FRmodel = faceRecoModel(input_shape=(3, 96, 96))

In [4]:
def triplet_loss(y_true, y_pred, alpha = 0.3):
    anchor, positive, negative = y_pred[0], y_pred[1], y_pred[2]

    pos_dist = tf.reduce_sum(tf.square(tf.subtract(anchor,
               positive)), axis=-1)
    neg_dist = tf.reduce_sum(tf.square(tf.subtract(anchor, 
               negative)), axis=-1)
    basic_loss = tf.add(tf.subtract(pos_dist, neg_dist), alpha)
    loss = tf.reduce_sum(tf.maximum(basic_loss, 0.0))
   
    return loss

In [5]:
FRmodel.compile(optimizer = 'adam', loss = triplet_loss, metrics = ['accuracy'])
load_weights_from_FaceNet(FRmodel)

In [6]:
from keras.models import load_model
# from utils import get_face, get_encode, l2_normalizer, normalize

# hyper-parameters
# encoder_model = 'data/model/facenet_keras.h5'
people_dir = 'dataset/friends'
encodings_path = 'encodings/encodings.pkl'
required_size = (96, 96)

# face_detector = mtcnn.MTCNN()
# face_encoder = load_model(encoder_model)

encoding_dict = dict()      


for person_name in os.listdir(people_dir):
    person_dir = os.path.join(people_dir, person_name)
    encodes = []
    for img_name in os.listdir(person_dir):
        img_path = os.path.join(person_dir, img_name)
        img = cv2.imread(img_path)
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        results = face_detector.detect_faces(img_rgb)
        if results:
            res = max(results, key=lambda b: b['box'][2] * b['box'][3])
            face, _, _ = get_face(img_rgb, res['box'])

            face = normalize(face)
            face = cv2.resize(face, required_size)
            
            img = face[...,::-1]
            img = np.around(np.transpose(img, (2,0,1))/255.0, decimals=12)
            x_train = np.array([img])
            encode = FRmodel.predict_on_batch(x_train)
    
#             encode = face_encoder.predict(np.expand_dims(face, axis=0))[0]
            encodes.append(encode)
            
    
    if encodes:
        
        encode = np.sum(encodes, axis=0)
        
#         print(encode.shape)
#         nsamples, nx, ny = encode.shape
#         d2_encode = encode.reshape((nsamples,nx * ny))
        
        encode = l2_normalizer.transform(np.expand_dims(encode, axis=0)[0])[0]
        encoding_dict[person_name] = encode


for key in encoding_dict.keys():
    print(key)

with open(encodings_path, 'bw') as file:
    pickle.dump(encoding_dict, file)

courteney
kudrow
leblanc
matthew
schwimmer


In [7]:
def get_encode(model, face, size):
    face = normalize(face)
    face = cv2.resize(face, size)
#     encode = face_encoder.predict(np.expand_dims(face, axis=0))[0]
    
    img = face[...,::-1]
    img = np.around(np.transpose(img, (2,0,1))/255.0, decimals=12)
    x_train = np.array([img])
    encode = model.predict_on_batch(x_train)
    
    return encode

In [8]:
# def get_face(img, box):
#     x1, y1, width, height = box
#     x1, y1 = abs(x1), abs(y1)
#     x2, y2 = x1 + width, y1 + height
#     face = img[y1:y2, x1:x2]
#     return face, (x1, y1), (x2, y2)

In [15]:
from scipy.spatial.distance import cosine

def recognize(img,
              detector,
              encoder,
              encoding_dict,
              recognition_t=0.5,
              confidence_t=0.95,
              required_size=(96, 96),):
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    results = face_detector.detect_faces(img_rgb)
    for res in results:
        if res['confidence'] < confidence_t:
            continue
        face, pt_1, pt_2 = get_face(img_rgb, res['box'])
        encode = get_encode(encoder, face, required_size)
        encode = l2_normalizer.transform(encode.reshape(1, -1))[0]
        name = 'unknown'
        

        
#             dist = np.linalg.norm(encoding-database[name])
#             if dist < min_dist:
#                 min_dist = dist
#                 identity = name

        distance = float("inf")
        for db_name, db_encode in encoding_dict.items():
            dist = np.linalg.norm(encode-db_encode)
#             dist = cosine(db_encode, encode)
            if dist < recognition_t and dist < distance:
                name = db_name
                distance = dist

        if name == 'unknown':
            cv2.rectangle(img, pt_1, pt_2, (0, 0, 255), 1)
            cv2.putText(img, name, pt_1, cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1)
        else:
            cv2.rectangle(img, pt_1, pt_2, (0, 255, 0), 1)
            cv2.putText(img, name + f'-{distance:.3f}', (pt_1[0], pt_1[1] - 15), cv2.FONT_HERSHEY_SIMPLEX, 0.5,
                        (0, 200, 100), 1)
    return img

In [16]:
# encoder_model = 'data/model/facenet_keras.h5'
encodings_path = 'encodings/encodings.pkl'

# detector = mtcnn.MTCNN()
# face_encoder = load_model(encoder_model)
encoding_dict = load_pickle(encodings_path)

vc = cv2.VideoCapture('dataset/friends.mp4')
while vc.isOpened():
    ret, frame = vc.read()
    if not ret:
        print("no frame:(")
        break
    frame = recognize(frame, face_detector, FRmodel, encoding_dict)
    cv2.imshow('camera', frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break


In [None]:
# def img_to_encoding_dataset(image_path, model):
    
#     image = cv2.imread(image_path, 1)
#     face = detector.detect_faces(image)
    
#     x1, y1, x2, y2 = face[0]['box']
        
#     image = image[y1:y1 + y2, x1:x1 + x2]  
#     image = normalize(image)
#     image = cv2.resize(image, (96, 96)) 
    
#     cv2.imshow('image', image) 
#     cv2.waitKey(0)
#     cv2.destroyAllWindows() 
    
#     img = image[...,::-1]
#     img = np.around(np.transpose(img, (2,0,1))/255.0, decimals=12)
#     x_train = np.array([img])
#     embedding = model.predict_on_batch(x_train)
#     return embedding

In [None]:

# people_dir = 'dataset/friends'
# database = {}

# for person_name in os.listdir(people_dir):
#     person_dir = os.path.join(people_dir, person_name)
#     for img_name in os.listdir(person_dir):
#         img_path = os.path.join(person_dir, img_name)
#         database[person_name] = img_to_encoding_dataset(img_path, FRmodel)

# # for file in glob.glob("dataset/friends/*"):
    
# #     identity = os.path.splitext(os.path.basename(file))[0]
# #     database[identity] = img_to_encoding_dataset(file, FRmodel)

In [None]:
# def img_to_encoding(image, model):

#     image = normalize(image)
#     image = cv2.resize(image, (96, 96)) 
#     img = image[...,::-1]
#     img = np.around(np.transpose(img, (2,0,1))/255.0, decimals=12)
#     x_train = np.array([img])
#     embedding = model.predict_on_batch(x_train)
#     return embedding

In [None]:
# def find_identity(frame, x1, y1, x2, y2):

#     frame = frame[y1:y2, x1:x2]

#     return who_is_it(frame, database, FRmodel)

In [None]:
# def who_is_it(image_path, database, model):
    
#     encoding = img_to_encoding(image_path, model)
#     min_dist = 100
    
#     for (name, db_enc) in database.items():
        
#         dist = np.linalg.norm(encoding-database[name])
#         if dist < min_dist:
#             min_dist = dist
#             identity = name
    
# #     if min_dist > 0.7:
# #         print("Not in the database.")
# #     else:
# #         print ("it's " + str(identity) + ", the distance is " + str(min_dist))
        
#     return min_dist, identity

In [None]:
# puth = 'dataset/friends.mp4'

# video_capture = cv2.VideoCapture(puth)

# while video_capture.isOpened():

#     _, frame = video_capture.read()
#     image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
#     faces = detector.detect_faces(image)
    
#     for face in faces:
        
#         x1, y1, x2, y2 = face['box']
#         x2 = x1+x2
#         y2 = y1+y2
#         min_dist, identity = find_identity(frame, x1, y1, x2, y2)
#         cv2.rectangle(frame, (x1, y1), (x2, y2), (0,0,255), 1)
        
#         cv2.putText(frame, str(float('{:.4f}'.format(min_dist))) + " - " + str(identity), (x1, y1 - 20), 
#                 cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1) #lineType=cv2.LINE_AA)        

#     cv2.imshow('Video', frame)
#     cv2.waitKey(40)

#     if cv2.waitKey(1) & 0xFF == ord('q'):
#         break

# video_capture.release()
# cv2.destroyAllWindows()