In [1]:
"""Importing neccessary modules"""
import numpy as np
import pandas as pd
import glob
import cv2
import math
import tensorflow as tf
import keras.backend as K
from numpy import random
from keras_facenet import FaceNet
import matplotlib.pyplot as plt
from keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score , roc_curve
from tensorflow.keras.applications.inception_resnet_v2 import InceptionResNetV2
from tensorflow.keras.models import Model
from keras.layers import Dense , Flatten , Input ,BatchNormalization , concatenate , Lambda

In [2]:
"""we will use pretrained model which will take 160*160*3 size images as input"""
from tensorflow.keras.models import load_model
img_shape = (160,160,3)

In [3]:
"""Read the image to ie convert them into np array"""
def read_image(img):
    image = cv2.imread(img)
    image = cv2.cvtColor(image,cv2.COLOR_BGR2RGB)
    return image

In [4]:
"""Names of celebrity on whom images model will be trained"""
names = [
    'alexandro_dadario','aishwarya_rae','burak_deniz','chris_hemsworth','elon_musk','gayatri_bharadwaj',
    'hande_ercel','virat_kohli','ronaldo','sushant','samantha','hritik_roshan','priyanka_chopra',
    'nidhi_agarwal','kit_harington','kristen_stewart','nayantra','rasmika_mandhana','sundar_pichai',
    'zayn_malik','vicky_kaushal','ana_de_armas','amber_heard','chris_evans','DeepikaPadukone','mahesh_babu',
    'modiji','priya','sanjana_sanghi','scarlet_johnson'
]

In [5]:
len(names)

30

In [6]:
"""Mapping each celebrity number"""
names_to_label = {}
labels_to_name = {}
faces = {}
imgshape = (160,160,3)
cnt = len(glob.glob('./New_data/**/*.jpg'))

In [7]:
"""Mappped"""
for i,name in enumerate(names):
    names_to_label[name] = i
    labels_to_name[i] = name

In [8]:
"""Read + preprocessing image"""
def readimage(url):
    img = cv2.imread(url)
    img = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
    img = np.asarray(img).astype('float32')
    img = cv2.resize(img,(160,160))
    img = img/255.0
    return img
    

In [9]:
cnt = len(glob.glob('./New_data/**/*.jpg'))

In [10]:
"""X and y to store the images , dataset + label"""
X = np.zeros((cnt,)+img_shape,dtype='float32')
y = np.zeros(cnt,dtype='float32')

In [11]:
X.shape

(3524, 160, 160, 3)

In [12]:
for i,img in enumerate(glob.glob('./New_data/**/*.jpg')):
    X[i] = readimage(img)
    name = img.split('/')[2]
    y[i] = names_to_label[name]

In [13]:
"""Split the dataset into 3 parts 
1. training
2.validation
3. test
stratified random sampling performed before spliting
"""
X_train ,  X_test , y_train, y_test = \
        train_test_split(X,y,shuffle=True,stratify=y,random_state=42,test_size=.1)

In [14]:
X_train ,  X_val , y_train, y_val = \
        train_test_split(X_train,y_train,shuffle=True,stratify=y_train,random_state=42,test_size=.1)

In [15]:
# """Function to create image pair(they could be of same or different person) , with the label of 0 if they are same ,
# else 0"""
# def make_pair(X,y):
#     num_class = np.unique(y).shape[0] # number of classes 
#     indices_pos = [np.squeeze(np.where(y == i)) for i in range(num_class)] # index of same celebrity images as anchor
#     indices_neg = [np.squeeze(np.where(y != i)) for i in range(num_class)] # index of different celebrity from anchor
    
    
#     pairs , labels = [] ,[]
    
#     for idx in range(X.shape[0]):
#         anchor = X[idx] # anchor image
#         label = int(y[idx]) # anchor image label
        
#         positive = X[np.random.choice(indices_pos[label])] # positive image
#         negative = X[np.random.choice(indices_neg[label])] # negative image
        
#         pairs += [[anchor, positive]] # creating a pair of anchor + positive
#         labels += [1] # corresponding label of 1 as both image is of same person
        
#         pairs += [[anchor, negative]] # creating a pair of anchor + negative
#         labels += [0] # corresponding label of 0 as both image is of different person
        
#     return np.array(pairs), np.array(labels).astype('float32')

def make_pair(X,y):
    num_class =np.unique(y).shape[0]
    indices_pos = [np.squeeze(np.where(y == i)) for i in range(num_class)]
    indices_neg = [np.squeeze(np.where(y != i)) for i in range(num_class)]
    
    
    pairs , labels = [] ,[]
    
    for idx in range(X.shape[0]):
        x1 = X[idx]
        label = int(y[idx])
        
        x2 = X[np.random.choice(indices_pos[label])]
        x3 = X[np.random.choice(indices_neg[label])]
        
        pairs += [[x1, x2]]
        labels += [1]
        
        pairs += [[x1, x3]]
        labels += [0]
        
    return np.array(pairs), np.array(labels).astype('float32')

In [16]:
"""Trainng pairs"""
pair_train , labels_train= make_pair(X_train,y_train)

In [17]:
"""Test pair"""
pair_test , labels_test= make_pair(X_test,y_test)

In [18]:
"""Validation pair"""
pair_val , labels_val= make_pair(X_val,y_val)

In [19]:
x_train_1 = pair_train[:, 0]  
x_train_2 = pair_train[:, 1]

x_val_1 = pair_val[:, 0]  
x_val_2 = pair_val[:, 1]

x_test_1 = pair_test[:, 0]  
x_test_2 = pair_test[:, 1]

In [20]:
"""Model will output 128 dimenstion vector from the image, we will calculate the euclidean distance"""
def euclidean_distance(vects):
    x, y = vects # vector is a pair 
#   Formula of euclidean distance sqrt(x1-x2)*(x1-x2) + (y1-y2)*(y1-y2))
    sum_square = tf.math.reduce_sum(tf.math.square(x - y), axis=1, keepdims=True)
    return tf.math.sqrt(tf.math.maximum(sum_square, tf.keras.backend.epsilon()))

In [21]:
"""Importing pretrained model"""
from keras.applications.inception_resnet_v2 import InceptionResNetV2 , preprocess_input

In [22]:
'''Creating an instance of it'''
resnet = InceptionResNetV2(include_top=False,weights='imagenet',input_shape=img_shape)

Metal device set to: Apple M1


2022-08-04 09:46:09.833143: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2022-08-04 09:46:09.833759: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [23]:
'''Creating function Cnn model , we will take the output of InceptionResnetv2 model'''
x = resnet.layers[-1].output
x = Flatten()(x) # flatten the output of resnet model output
x = Dense(512,activation='relu')(x) # applying ANN to reduce it to 512 vector size
out = Dense(128,activation='relu')(x) # further to  128
model = Model(resnet.input,out)
# Above model will take an image as input and will return a 128 sized vector , which is the embedding of each iamge

In [24]:
input_1 =Input(img_shape)
input_2 = Input(img_shape)

tower_1 = model(input_1)
tower_2 = model(input_2)

In [None]:
"""Merge the image pairs embeddings, normalise them , further applying Ann to reduce final output to 1 sized vector"""
merge_layer = Lambda(euclidean_distance)([tower_1, tower_2])
normal_layer = BatchNormalization()(merge_layer)
output_layer = Dense(1, activation="sigmoid")(normal_layer)
siamese = Model(inputs=[input_1, input_2], outputs=output_layer)
# Above siamese model take a pair as input , an return a number , which if close to 1 means they belongs to same person 
# if close to 0 , belongs to different persons

In [None]:
def loss(margin=1):
    # Contrastive loss = mean( (1-true_value) * square(prediction) +
    #                         true_value * square( max(margin-prediction, 0) ))
    def contrastive_loss(y_true, y_pred):
        """Calculates the constrastive loss.

        Arguments:
            y_true: List of labels, each label is of type float32.
            y_pred: List of predictions of same length as of y_true,
                    each label is of type float32.

        Returns:
            A tensor containing constrastive loss as floating point value.
        """

        square_pred = tf.math.square(y_pred)
        margin_square = tf.math.square(tf.math.maximum(margin - (y_pred), 0))
        return tf.math.reduce_mean(
            (1 - y_true) * square_pred + (y_true) * margin_square
        )

    return contrastive_loss


In [None]:
epochs = 20
batch_size = 48
margin = 1.5  # Margin for constrastive loss.

In [None]:
siamese.compile(loss=loss(margin=margin), optimizer=tf.optimizers.Adam(lr=0.0006), metrics=["accuracy"])
siamese.summary()

In [33]:
history = siamese.fit(
    [x_train_1, x_train_2],
    labels_train,
    validation_data=([x_val_1, x_val_2], labels_val),
    batch_size=batch_size,
    epochs=epochs,
#     callbacks=EarlyStopping(monitor='val_loss' , patience=5,min_delta=0.1)x
)

2022-08-03 16:41:07.334846: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


Epoch 1/20


2022-08-03 16:41:19.243918: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.




2022-08-03 16:50:01.848807: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [25]:
pa = load_model('./NewDataSiameseModel.h5',compile=False)

In [26]:
from sklearn.metrics import classification_report, confusion_matrix , accuracy_score , recall_score , precision_score

In [27]:
p1 = pa.predict([x_test_1,x_test_2])

2022-08-04 09:47:27.021597: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
2022-08-04 09:47:29.024737: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.




In [28]:
yp1 = np.where(p1>=0.5,1,0)

In [29]:
labels_test.shape

(706,)

In [30]:
confusion_matrix(yp1,labels_test)

array([[302,   8],
       [ 51, 345]])

In [31]:
accuracy_score(yp1,labels_test) , precision_score(yp1,labels_test) , recall_score(yp1,labels_test) 

(0.9164305949008499, 0.9773371104815864, 0.8712121212121212)