In [1]:
import os
import cv2
import dlib
import pickle
import random
import numpy as np
from tqdm import tqdm
import tensorflow as tf
from datetime import datetime
from imutils import face_utils
from tensorflow.keras.layers import Input, Convolution2D, ZeroPadding2D, MaxPooling2D, Flatten, Dropout, Activation
from tensorflow.keras.models import Model

In [2]:
base_dir = "."
checkpoint_path = os.path.join(base_dir, 'logs/model/siamese-1')

In [3]:
K = tf.keras.backend

def preprocess_input(x, data_format=None, version=1):
    x_temp = np.copy(x)
    if data_format is None:
        data_format = K.image_data_format()
    assert data_format in {'channels_last', 'channels_first'}

    if version == 1:
        if data_format == 'channels_first':
            x_temp = x_temp[:, ::-1, ...]
            x_temp[:, 0, :, :] -= 93.5940
            x_temp[:, 1, :, :] -= 104.7624
            x_temp[:, 2, :, :] -= 129.1863
        else:
            x_temp = x_temp[..., ::-1]
            x_temp[..., 0] -= 93.5940
            x_temp[..., 1] -= 104.7624
            x_temp[..., 2] -= 129.1863

    elif version == 2:
        if data_format == 'channels_first':
            x_temp = x_temp[:, ::-1, ...]
            x_temp[:, 0, :, :] -= 91.4953
            x_temp[:, 1, :, :] -= 103.8827
            x_temp[:, 2, :, :] -= 131.0912
        else:
            x_temp = x_temp[..., ::-1]
            x_temp[..., 0] -= 91.4953
            x_temp[..., 1] -= 103.8827
            x_temp[..., 2] -= 131.0912
    else:
        raise NotImplementedError

    return x_temp

In [4]:
vggface = tf.keras.models.Sequential()
vggface.add(ZeroPadding2D((1,1),input_shape=(224,224, 3)))
vggface.add(Convolution2D(64, (3, 3), activation='relu'))
vggface.add(ZeroPadding2D((1,1)))
vggface.add(Convolution2D(64, (3, 3), activation='relu'))
vggface.add(MaxPooling2D((2,2), strides=(2,2)))
vggface.add(ZeroPadding2D((1,1)))	
vggface.add(Convolution2D(128, (3, 3), activation='relu'))
vggface.add(ZeroPadding2D((1,1)))
vggface.add(Convolution2D(128, (3, 3), activation='relu'))
vggface.add(MaxPooling2D((2,2), strides=(2,2)))
vggface.add(ZeroPadding2D((1,1)))
vggface.add(Convolution2D(256, (3, 3), activation='relu'))
vggface.add(ZeroPadding2D((1,1)))
vggface.add(Convolution2D(256, (3, 3), activation='relu'))
vggface.add(ZeroPadding2D((1,1)))
vggface.add(Convolution2D(256, (3, 3), activation='relu'))
vggface.add(MaxPooling2D((2,2), strides=(2,2)))
vggface.add(ZeroPadding2D((1,1)))
vggface.add(Convolution2D(512, (3, 3), activation='relu'))
vggface.add(ZeroPadding2D((1,1)))
vggface.add(Convolution2D(512, (3, 3), activation='relu'))
vggface.add(ZeroPadding2D((1,1)))
vggface.add(Convolution2D(512, (3, 3), activation='relu'))
vggface.add(MaxPooling2D((2,2), strides=(2,2)))
vggface.add(ZeroPadding2D((1,1)))
vggface.add(Convolution2D(512, (3, 3), activation='relu'))
vggface.add(ZeroPadding2D((1,1)))
vggface.add(Convolution2D(512, (3, 3), activation='relu'))
vggface.add(ZeroPadding2D((1,1)))
vggface.add(Convolution2D(512, (3, 3), activation='relu'))
vggface.add(MaxPooling2D((2,2), strides=(2,2)))
vggface.add(Convolution2D(4096, (7, 7), activation='relu'))
vggface.add(Dropout(0.5))
vggface.add(Convolution2D(4096, (1, 1), activation='relu'))
vggface.add(Dropout(0.5))
vggface.add(Convolution2D(2622, (1, 1)))
vggface.add(Flatten())
vggface.add(Activation('softmax'))

vggface.pop()
vggface.add(tf.keras.layers.Dense(128, use_bias=False))

for layer in vggface.layers[:-2]:
    layer.trainable = False

In [5]:
class SiameseNetwork(tf.keras.Model):
    def __init__(self, vgg_face):
        super(SiameseNetwork, self).__init__()
        self.vgg_face = vgg_face
        
    @tf.function
    def call(self, inputs):
        image_1, image_2, image_3 =  inputs
        with tf.name_scope("Anchor") as scope:
            feature_1 = self.vgg_face(image_1)
            feature_1 = tf.math.l2_normalize(feature_1, axis=-1)
        with tf.name_scope("Positive") as scope:
            feature_2 = self.vgg_face(image_2)
            feature_2 = tf.math.l2_normalize(feature_2, axis=-1)
        with tf.name_scope("Negative") as scope:
            feature_3 = self.vgg_face(image_3)
            feature_3 = tf.math.l2_normalize(feature_3, axis=-1)
        return [feature_1, feature_2, feature_3]
    
    @tf.function
    def get_features(self, inputs):
        return tf.math.l2_normalize(self.vgg_face(inputs), axis=-1)

In [6]:
model = SiameseNetwork(vggface)
_ = model([tf.zeros((32,224,224,3)), tf.zeros((32,224,224,3)), tf.zeros((32,224,224,3))])
_ = model.get_features(tf.zeros((32,224,224,3)))
checkpoint = tf.train.Checkpoint(model=model)
checkpoint.restore(checkpoint_path)
data_dir = 'data/'

In [7]:
name = input("Enter the name of the person : ")
os.mkdir(os.path.join(data_dir,name))
cap = cv2.VideoCapture(0)
count = 0
while True:
    ret, frame = cap.read()
    cv2.imshow('Image', frame)
    k = cv2.waitKey(1)
    if k == ord('s'):
        cv2.imwrite(os.path.join(data_dir, name + '/' + str(count) + '.png') , frame)
        count += 1
    if k ==ord('q'):
        break
cap.release()
cv2.destroyAllWindows()

In [8]:
features = []
people = sorted(os.listdir(data_dir))
face_detector = dlib.get_frontal_face_detector()
features = []
dumpable_features = {}

In [9]:
for person in people:
    person_path = os.path.join(data_dir, person)
    print(person_path)
    images = []
    for image in os.listdir(person_path):
        image_path = os.path.join(person_path, image)
        img = cv2.imread(image_path)
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        faces = face_detector(gray, 0)
        if len(faces) == 1:
            for face in faces:
                face_bounding_box = face_utils.rect_to_bb(face)
                if all(i >= 0 for i in face_bounding_box):
                    [x, y, w, h] = face_bounding_box
                    frame = img[y:y + h, x:x + w]
                    frame = cv2.resize(frame, (224, 224))
                    frame = np.asarray(frame, dtype=np.float64)
                    images.append(frame)
    images = np.asarray(images)
    images = preprocess_input(images)
    images = tf.convert_to_tensor(images)
    feature = model.get_features(images)
    feature = tf.reduce_mean(feature, axis=0)
    features.append(feature.numpy())
    dumpable_features[person] = feature.numpy()

data/Aditya


In [10]:
features = np.asarray(features)
print(people)

['Aditya']


In [11]:
cap = cv2.VideoCapture(0)
count = 0
name = 'not identified'
while True:
    ret, img = cap.read()
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    
    faces = face_detector(gray, 0)
    for face in faces:
        face_bounding_box = face_utils.rect_to_bb(face)
        if all(i >= 0 for i in face_bounding_box):
            [x, y, w, h] = face_bounding_box
            frame = img[y:y + h, x:x + w]
            cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2)
            frame = cv2.resize(frame, (224, 224))
            frame = np.asarray(frame, dtype=np.float64)
            frame = np.expand_dims(frame, axis=0)
            frame = preprocess_input(frame)
            feature = model.get_features(frame)
                
            dist = tf.norm(features - feature, axis=1)
            name = 'not identified'
            loc = tf.argmin(dist)
            if dist[loc] < 0.8:
                name = people[loc]
            else:
#                     print(dist.numpy())
                pass
                    
            font_face = cv2.FONT_HERSHEY_SIMPLEX
            cv2.putText(img, name, (x, y-5), font_face, 0.8, (0,0,255), 3)
    cv2.imshow('Image', img)
    k = cv2.waitKey(1)
    if k ==ord('q'):
        break
cap.release()
cv2.destroyAllWindows()