## Face Verification using a Siamese Network

In [None]:
import os
import cv2
import time
import random
import numpy as np

import tensorflow as tf
from tensorflow.keras.applications.inception_v3 import preprocess_input

import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd ## for reading csv file and wroking with dataframe operations
from PIL import Image ## for image processing and output
import matplotlib.pyplot as plt
import random
from tensorflow.keras.layers import *
tf.__version__, np.__version__

## Loading the data

In [None]:
# Set paths
dataset_folder = '/kaggle/input/iiitb-faces/IIITB-FACES'

def read_image(info):
    path = os.path.join(dataset_folder, info[0], info[1])
    img = cv2.imread(path)
    gray_img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    
    
    gray_img = cv2.equalizeHist(gray_img) # Apply histogram equalization
    faces = face_cascade.detectMultiScale(img, scaleFactor=1.1, minNeighbors=5, minSize=(50, 50))
    face_image = img
    if(len(faces) > 0):
        x, y, w, h = faces[0]
        face_image = img[y:y+h, x:x+w]
    face_image = cv2.resize(face_image, (350,350))
    return face_image

test_list = {} 
train_list = {}
tot_train_images = 0
tot_test_images = 0

# Loop through each person's folder
for person_folder in os.listdir(dataset_folder):
    person_path = os.path.join(dataset_folder, person_folder)
    person_images = [image_file for image_file in os.listdir(person_path)]
    
    # Shuffle the images
    random.shuffle(person_images)
    
    # Calculate split point based on 80-20 ratio
    split_index = int(0.8 * len(person_images))
    
    # Split images into train and test
    for image_file in person_images[:split_index]:  
        tot_train_images += 1
        if person_folder in train_list:
            train_list[person_folder].append(image_file)
        else:
            train_list[person_folder] = [image_file]
        
    for image_file in person_images[split_index:]:
        tot_test_images += 1
        if person_folder in test_list:
            test_list[person_folder].append(image_file)
        else:
            test_list[person_folder] = [image_file]
            
    
print("Total images:", tot_train_images + tot_test_images)
print("Total train images:", tot_train_images)
print("Total test images:", tot_test_images)

#### 49 Persons(Classes), 19 Images per person
#### 832 Images (Some images missing)

In [None]:
total_images = tot_train_images + tot_test_images
total_train_images = tot_train_images
total_test_images = tot_test_images
total_classes = 49

## Face detection and cropping

In [None]:
# Load OpenCV's pre-trained face detection cascade classifier
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

## Operating on the training data

## Creating Triplets from the Training data for training the Siamese Network using Triplet Loss
Triplets of (anchor, postive, negative) are created using the training data. Positive is the same person and negative is a different person than the anchor.

In [None]:
def create_triplets(directory, folder_list, max_files=10):
    triplets = []
    folders = list(folder_list.keys())
    
    for folder in folders:
        path = os.path.join(directory, folder)
        files = list(os.listdir(path))[:max_files]
        num_files = len(files)
        
        for i in range(num_files-1):
            for j in range(i+1, num_files):
                anchor = (folder, files[i])
                positive = (folder, files[j])

                neg_folder = folder
                while neg_folder == folder:
                    neg_folder = random.choice(folders)
                    
                num_negs = 2
                files_chosen = []
                
                while(num_negs > 0):
                    neg_file = folder_list[neg_folder][random.randint(0, len(folder_list[neg_folder])-1)]
                    if(neg_file not in files_chosen):
                        negative = (neg_folder, neg_file)
                        triplets.append((anchor, positive, negative))
                        num_negs -= 1
                        files_chosen.append(neg_file)
            
    random.shuffle(triplets)
    return triplets

In [None]:
train_triplets = create_triplets(dataset_folder, train_list)

print("Number of training triplets:", len(train_triplets))

print("\nExamples of triplets:")
for i in range(5):
    print(train_triplets[i])

## Creating Batch-Generator¶
Used to obtain chunks/batches of triplets of (anchor, positive, negative) samples. 

In [None]:
def get_batch(triplet_list, batch_size=256, preprocess=True):
    batch_steps = len(triplet_list)//batch_size
    
    for i in range(batch_steps+1):
        anchor   = []
        positive = []
        negative = []
        
        j = i*batch_size
        while j<(i+1)*batch_size and j<len(triplet_list):
            a, p, n = triplet_list[j]
            anchor.append(read_image(a))
            positive.append(read_image(p))
            negative.append(read_image(n))
            
            j+=1
            
        anchor = np.array(anchor)
        positive = np.array(positive)
        negative = np.array(negative)
        
        
        if preprocess:
            anchor = preprocess_input(anchor)
            positive = preprocess_input(positive)
            negative = preprocess_input(negative)
        
        yield ([anchor, positive, negative])

## Plotting the data
Plotting the data generated from get_batch()

In [None]:
num_plots = 6

f, axes = plt.subplots(num_plots, 3, figsize=(15, 20))

for x in get_batch(train_triplets, batch_size=num_plots, preprocess=False):
    a,p,n = x
    for i in range(num_plots):
        axes[i, 0].imshow(a[i])
        axes[i, 1].imshow(p[i])
        axes[i, 2].imshow(n[i])
        i+=1
    break

## Creating the Siamese Network
In contrast to a typical Convolutional Neural Network (CNN), the Siamese Network doesn't categorize images into specific classes or labels. Instead, its primary task is to determine the dissimilarity or distance between any pair of provided images. When the two images share the same label, the network aims to adjust its parameters, including weights and biases, to minimize the distance between these two images. Conversely, if the images belong to distinct labels, the network should increase the distance between them.

![](https://miro.medium.com/max/2000/1*05hUCDHhnl4hdjqvdVTHtw.png)

In [None]:
from tensorflow.keras import backend, layers, metrics

from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications import Xception
from tensorflow.keras.models import Model, Sequential

from tensorflow.keras.utils import plot_model
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

### Creating the encoder
The encoder's role is to transform the provided images into their respective feature vectors. To achieve this, we are utilizing a pre-trained Xception model, which is built upon the Inception_V3 model.

In [None]:
def get_encoder(input_shape):
    """ Returns the image encoding model """

    pretrained_model = Xception(
        input_shape=input_shape,
        weights='imagenet',
        include_top=False,
        pooling='avg',
    )
    
    for i in range(len(pretrained_model.layers)-27):
        pretrained_model.layers[i].trainable = False

    encode_model = Sequential([
        pretrained_model,
        layers.Flatten(),
        layers.Dense(512, activation='relu'),
        layers.BatchNormalization(),
        layers.Dense(256, activation="relu"),
        layers.Lambda(lambda x: tf.math.l2_normalize(x, axis=1))
    ], name="Encode_Model")
    return encode_model

We're creating a Siamese Network that takes 3 input images, (anchor, postive, negative) and uses the encoder above to encode the images to their feature vectors. Those features are passed to a distance layer which computes the distance between (anchor, positive) and (anchor, negative) pairs.

In [None]:
class DistanceLayer(layers.Layer):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)

    def call(self, anchor, positive, negative):
        ap_distance = tf.reduce_sum(tf.square(anchor - positive), -1)
        an_distance = tf.reduce_sum(tf.square(anchor - negative), -1)
        return (ap_distance, an_distance)
    

def get_siamese_network(input_shape = (350, 350, 3)):
    encoder = get_encoder(input_shape)
    
    # Input Layers for the images
    anchor_input   = layers.Input(input_shape, name="Anchor_Input")
    positive_input = layers.Input(input_shape, name="Positive_Input")
    negative_input = layers.Input(input_shape, name="Negative_Input")
    
    ## Generate the encodings (feature vectors) for the images
    encoded_a = encoder(anchor_input)
    encoded_p = encoder(positive_input)
    encoded_n = encoder(negative_input)
    
    # A layer to compute ‖f(A) - f(P)‖² and ‖f(A) - f(N)‖²
    distances = DistanceLayer()(
        encoder(anchor_input),
        encoder(positive_input),
        encoder(negative_input)
    )
    
    # Creating the Model
    siamese_network = Model(
        inputs  = [anchor_input, positive_input, negative_input],
        outputs = distances,
        name = "Siamese_Network"
    )
    return siamese_network

siamese_network = get_siamese_network()
siamese_network.summary()

In [None]:
plot_model(siamese_network, show_shapes=True, show_layer_names=True)

## Creating the final Siamese Model class
Our next step involves creating a model that utilizes a custom training loop and loss function. This model will allow us to calculate the triplet loss by utilizing the three embeddings generated by the Siamese network.

In [None]:
class SiameseModel(Model):
    # Builds a Siamese model based on a base-model
    def __init__(self, siamese_network, margin=1.0):
        super(SiameseModel, self).__init__()
        
        self.margin = margin
        self.siamese_network = siamese_network
        self.loss_tracker = metrics.Mean(name="loss")

    def call(self, inputs):
        return self.siamese_network(inputs)

    def train_step(self, data):
        # GradientTape get the gradients when we compute loss, and uses them to update the weights
        with tf.GradientTape() as tape:
            loss = self._compute_loss(data)
            
        gradients = tape.gradient(loss, self.siamese_network.trainable_weights)
        self.optimizer.apply_gradients(zip(gradients, self.siamese_network.trainable_weights))
        
        self.loss_tracker.update_state(loss)
        return {"loss": self.loss_tracker.result()}

    def test_step(self, data):
        loss = self._compute_loss(data)
        
        self.loss_tracker.update_state(loss)
        return {"loss": self.loss_tracker.result()}

    def _compute_loss(self, data):
        # Get the two distances from the network, then compute the triplet loss
        ap_distance, an_distance = self.siamese_network(data)
        loss = tf.maximum(ap_distance - an_distance + self.margin, 0.0)
        return loss

    @property
    def metrics(self):
        # We need to list our metrics so the reset_states() can be called automatically.
        return [self.loss_tracker]

In [None]:
siamese_model = SiameseModel(siamese_network)

optimizer = Adam(learning_rate=1e-3, epsilon=1e-01)
siamese_model.compile(optimizer=optimizer)

## Training the Siamese model

In [None]:
save_all = False
epochs = 30
batch_size = 128

train_loss = []

for epoch in range(1, epochs+1):
    t = time.time()
    
    # Training the model on train data
    epoch_loss = []
    for data in get_batch(train_triplets, batch_size=batch_size):
        loss = siamese_model.train_on_batch(data)
        epoch_loss.append(loss)
    epoch_loss = sum(epoch_loss)/len(epoch_loss)
    train_loss.append(epoch_loss)

    print(f"\nEPOCH: {epoch} \t (Epoch done in {int(time.time()-t)} sec)")
    print(f"Loss on train    = {epoch_loss:.5f}")
    
    

# Saving the model after all epochs run
siamese_model.save_weights("siamese_model")

## Using the Siamese model
Having completed the training of our model, our next step is to extract the encoder. This will enable us to encode images, using the generated feature vectors to calculate the distance between them.

### Loading the saved model

In [None]:
# siamese_model = SiameseModel(siamese_network)
# optimizer = Adam(learning_rate=1e-3, epsilon=1e-01)
# siamese_model.compile(optimizer=optimizer)
# siamese_model.load_weights("/kaggle/input/output/results/siamese_model")

In [None]:
def extract_encoder(model):
    encoder = get_encoder((350, 350, 3))
    i=0
    for e_layer in model.layers[0].layers[3].layers:
        layer_weight = e_layer.get_weights()
        encoder.layers[i].set_weights(layer_weight)
        i+=1
    return encoder


encoder = extract_encoder(siamese_model)
encoder.save_weights("encoder")
encoder.summary()

## Loading the training data for training the classifier

In [None]:
train_image_list = []
for person_folder in train_list.keys():
    temp = []
    for image_file in train_list[person_folder]:
        b = read_image((person_folder, image_file))
        temp.append(b)
    train_image_list.append(temp)

In [None]:
sum = 0
lis = []
frequency = {}
for i in range(0, len(train_image_list)):
    sum += ((len(train_image_list[i]))*(len(train_image_list[i]) - 1))/2
    lis.append(len(train_image_list[i]))

for item in lis:
   # checking the element in dictionary
   if item in frequency:
      # incrementing the count
      frequency[item] += 1
   else:
      # initializing the count
      frequency[item] = 1
tot_val = 0
for key in frequency:
    tot_val += key*frequency[key]
final_freq = {}
for key in frequency:
    final_freq[key] = key*frequency[key]/tot_val


# printing the frequency
print(frequency)
print(final_freq)

## Creating pairs of images from the training dataset (to train the classifier)

In [None]:
random.seed(42)
newX1 = []
newX2 = []
newY = []
for i in range(len(train_image_list)):
    for j in range(0, len(train_image_list[i])):
        for k in range(0, j):
            newX1.append(train_image_list[i][k])
            newX2.append(train_image_list[i][j])
            newY.append(0)
    
    for u in range(0, len(train_image_list[i])):
        step = 1
        step = round((final_freq[len(train_image_list[i])]*4096)/frequency[len(train_image_list[i])])
        no_of_iter = round(step/len(train_image_list[i]))
        for l in range(0, no_of_iter):
            numbers = list(range(0, i)) + list(range(i+1, 49))
            r = random.choice(numbers)
            g = random.randint(0, len(train_image_list[r]) - 1)
            newX1.append(train_image_list[i][u])
            newX2.append(train_image_list[r][g])
            newY.append(1)
for i in range(0, 214):
        
    numbers = list(range(0,i%49)) + list(range(i%49 + 1,49))
    r = random.choice(numbers)
    uu = random.randint(0,len(train_image_list[i%49])-1)
    g = random.randint(0,len(train_image_list[r])-1)
    newX1.append(train_image_list[i%49][uu])
    newX2.append(train_image_list[r][g])
    newY.append(1)
print(len(newY))

In [None]:
c = list(zip(newX1, newX2, newY))

random.shuffle(c)

a, b, y = zip(*c)

## Extracting features using the encoder of the Siamese Model

In [None]:
# output1=base_model.predict(np.array(a)/255)
# output2=base_model.predict(np.array(b)/255)
output1 = []
output2 = []
for i in range(1, len(a)//256 + 1):
    a1 = a[(i-1)*256 : i*256]
    b1 = b[(i-1)*256 : i*256]
    output1.extend(encoder.predict(np.array(a1)/255))
    output2.extend(encoder.predict(np.array(b1)/255))
    

In [None]:
indexes1=[i for i,x in enumerate(y) if x == 1]
indexes0=[i for i,x in enumerate(y) if x == 0]

In [None]:
print(len(output2))

## Features Subtraction

- Getting absolute value of the difference between feature vectors of two images
- Plotting graphs for positive and negative image pairs

In [None]:
arr=[]
su=[]
for s in range(len(output1)):
    oo = np.abs(np.subtract(np.array(output1[s]),np.array(output2[s])))
    arr.append(oo)
    su.append(oo.sum())
    
a = np.array(su)
su1=list(a[indexes1])
su0=list(a[indexes0])

fig, axs = plt.subplots(1, 2)
fig.set_size_inches(18, 4)
fig.suptitle("Sum differences")
axs[0].plot(list(range(4096)),su1, list(range(4096)),su0)
axs[0].legend(["different people", "same person"])
#axs[0].title("Euclidean distance")
axs[1].plot(list(range(8192)),su)
axs[1].legend(["overall variation"])

    

## Using a simple neural network for classifying the image pairs as those of the same person or those of different people¶

In [None]:
import tensorflow.keras.backend as K
import tensorflow


def distance(vecs):
    x, y = vecs
    x = K.l2_normalize(x, axis=-1)
    y = K.l2_normalize(y, axis=-1)
    
    return K.abs(x-y)


featuresA=Input(256, )
featuresB=Input(256, )
distance= Lambda(distance)([featuresA,featuresB])

x= Dense(96, activation="relu")(distance)
x= Dropout(0.3)(x)
x= Dense(64)(x)
outputs = Dense(1, activation="sigmoid")(x)
model = Model(inputs=[featuresA, featuresB],outputs=outputs)
model.compile(loss='binary_crossentropy', optimizer=tensorflow.keras.optimizers.Adam(learning_rate=0.01), metrics=['accuracy'])
model.summary()



### Fitting the classifier and checking the validation accuracy

In [None]:
history=model.fit([np.array(output1)[:6144], np.array(output2)[:6144]],np.array(y)[:6144],validation_data=([np.array(output1)[6144:], np.array(output2)[6144:]],np.array(y)[6144:]), epochs=10, batch_size=16)

In [None]:
fig, axs = plt.subplots(1, 2)
fig.set_size_inches(18, 4)
fig.suptitle("Overfitting analysis")
axs[0].plot(list(range(1,11)), history.history['val_accuracy'], list(range(1,11)), history.history['accuracy'])

axs[0].title.set_text("Accuracy")
axs[0].legend(["validation accuracy", "training accuracy"])
axs[1].plot(list(range(1,11)), history.history['val_loss'], list(range(1,11)), history.history['loss'])
axs[1].title.set_text('Loss')
axs[1].legend(["validation loss", "trainig loss"])

## Testing

In [None]:
test_image_list = []
for person_folder in test_list.keys():
    temp = []
    for image_file in test_list[person_folder]:
        b = read_image((person_folder, image_file))
        temp.append(b)
    test_image_list.append(temp)
        

In [None]:
sum = 0
lis = []
frequency = {}
for i in range(0, len(test_image_list)):
    sum += ((len(test_image_list[i]))*(len(test_image_list[i]) - 1))/2
    lis.append(len(test_image_list[i]))

for item in lis:
   # checking the element in dictionary
   if item in frequency:
      # incrementing the count
      frequency[item] += 1
   else:
      # initializing the count
      frequency[item] = 1
tot_val = 0
for key in frequency:
    tot_val += key*frequency[key]
final_freq = {}
for key in frequency:
    final_freq[key] = key*frequency[key]/tot_val


# printing the frequency
print(final_freq)
print(frequency)

In [None]:
print(len(test_image_list))

### Creating pairs of images from the test dataset for evaluating the entire pipeline

In [None]:
random.seed(42)
newX1_test = []
newX2_test = []
newY_test = []
for i in range(len(test_image_list)):
    for j in range(0, len(test_image_list[i])):
        for k in range(0, j):
            newX1_test.append(test_image_list[i][k])
            newX2_test.append(test_image_list[i][j])
            newY_test.append(0)
    
    for u in range(0, len(test_image_list[i])):
        step = 1
        step = round((final_freq[len(test_image_list[i])]*264)/frequency[len(test_image_list[i])])
        no_of_iter = round(step/len(test_image_list[i]))
        for l in range(0, 1):
            numbers = list(range(0, i)) + list(range(i+1, 49))
            r = random.choice(numbers)
            g = random.randint(0, len(test_image_list[r]) - 1)
            newX1_test.append(test_image_list[i][u])
            newX2_test.append(test_image_list[r][g])
            newY_test.append(1)
for i in range(0, 79):
        
    numbers = list(range(0,i%49)) + list(range(i%49 + 1,49))
    r = random.choice(numbers)
    uu = random.randint(0,len(test_image_list[i%49])-1)
    g = random.randint(0,len(test_image_list[r])-1)
    newX1_test.append(test_image_list[i%49][uu])
    newX2_test.append(test_image_list[r][g])
    newY_test.append(1)
print(len(newY_test))

In [None]:
c_test = list(zip(newX1_test, newX2_test, newY_test))

random.shuffle(c_test)

a_test, b_test, y_test = zip(*c_test)

In [None]:
count = 0
for i in range(0, len(y_test)):
    if(y_test[i] == 0):
        count+=1
print(count)

In [None]:
print(len(a_test))

### Extracting features using the encoder of the Siamese Model

In [None]:
output1_test = []
output2_test = []
for i in range(1, len(a_test)//264 + 1):
    a1_test = a_test[(i-1)*264 : i*264]
    b1_test = b_test[(i-1)*264 : i*264]
    output1_test.extend(encoder.predict(np.array(a1_test)/255))
    output2_test.extend(encoder.predict(np.array(b1_test)/255))

In [None]:
indexes1_test=[i for i,x in enumerate(y_test) if x == 1]
indexes0_test=[i for i,x in enumerate(y_test) if x == 0]

### Using the classifier for final training and evaluation

In [None]:
import tensorflow.keras.backend as K
import tensorflow


def distance(vecs):
    x, y = vecs
    x = K.l2_normalize(x, axis=-1)
    y = K.l2_normalize(y, axis=-1)
    
    return K.abs(x-y)


featuresA=Input(256, )
featuresB=Input(256, )
distance= Lambda(distance)([featuresA,featuresB])

x= Dense(96, activation="relu")(distance)
x= Dropout(0.3)(x)
x= Dense(64)(x)
outputs = Dense(1, activation="sigmoid")(x)
model = Model(inputs=[featuresA, featuresB],outputs=outputs)
model.compile(loss='binary_crossentropy', optimizer=tensorflow.keras.optimizers.Adam(learning_rate=0.01), metrics=['accuracy'])
model.summary()

In [None]:
print(len(output1))

### Fitting the classifier on the entire training data

In [None]:
history=model.fit([np.array(output1)[:8192], np.array(output2)[:8192]],np.array(y)[:8192], epochs=15, batch_size=32)

### Obtaining final test results on the entire test data

In [None]:
test_res = model.evaluate([np.array(output1_test)[:528], np.array(output2_test)[:528]],np.array(y_test)[:528], batch_size=16)