Importing Packages

In [9]:
import os
import numpy as np
import cv2
import pandas as pd
import matplotlib.pyplot as plt
import random
import tensorflow.keras as keras
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Lambda, Dropout, BatchNormalization
from tensorflow.keras import backend as K
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import img_to_array
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, EarlyStopping, ReduceLROnPlateau, TensorBoard
from PIL import Image
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.regularizers import l2

This is the folder path and .csv file path

In [None]:
#Reference : https://www.kaggle.com/code/suraj520/siamese-network-100-acc-know-train-infer
train_dataset = pd.read_csv("PATH_TO_CSV_FILE")
train_dir = "PATH_TO_TRAINING DATA"

Defining data loader class for image convertion and creating batches.

In [None]:
#Reference : https://www.kaggle.com/code/suraj520/siamese-network-100-acc-know-train-infer

normalization_layer = tf.keras.layers.Rescaling(1./255)
class DataLoader:
  #constructor
  def __init__(self, dataset, batch_size,dir):
    self.dataset = dataset
    self.batch_size = batch_size
    self.dir = dir
  #shuffler
  def shuffle(self):
    return self.dataset.sample(frac=1)
  #generator
  def datagen(self):
    num_samples = len(self.dataset)
    while True:
        # shuffling the samples
        self.dataset = self.shuffle()
        for batch in range(1, num_samples, self.batch_size):
            image1_batch_samples = self.dir + "/" + self.dataset.iloc[:, 0][batch:batch + self.batch_size]
            image2_batch_samples = self.dir + "/" + self.dataset.iloc[:, 1][batch:batch + self.batch_size]
            label_batch_samples = self.dataset.iloc[:, 2][batch:batch + self.batch_size]
            Image1, Image2, Label = [], [], []
            for image1, image2, label in zip(image1_batch_samples, image2_batch_samples, label_batch_samples):
                # append them to Images directly
                image1_data = Image.open(image1)
                image2_data = Image.open(image2)
                # resizing the images
                image1_data = image1_data.resize((224,224))
                image2_data = image2_data.resize((224,224))
                # converting to grey scale
                image1_data = image1_data.convert('L')
                image2_data = image2_data.convert('L')
                #normalization_layer
                image1_data=normalization_layer(image1_data)
                image1_data=normalization_layer(image1_data)
                # converting to array
                image1_data = img_to_array(image1_data)
                image2_data = img_to_array(image2_data)
                Image1.append(image1_data)
                Image2.append(image2_data)
                Label.append(label)
            # convert each list to numpy arrays to ensure that they get processed by fit function
            Image1 = np.asarray(Image1).astype(np.float32)
            Image2 = np.asarray(Image2).astype(np.float32)
            Label = np.asarray(Label).astype(np.float32)
            yield [Image1, Image2], Label


Pre-processing the  images

In [None]:
#Reference : https://www.kaggle.com/code/suraj520/siamese-network-100-acc-know-train-infer
train_set, val_set = train_test_split(train_dataset, test_size=0.25)
train_gen= DataLoader(train_set,128,train_dir)
val_gen = DataLoader(val_set,128,train_dir)
train_batch = next(train_gen.datagen())
print("Train batch images shape:", train_batch[0][0].shape, train_batch[0][1].shape)
print("Train batch labels shape:", train_batch[1].shape)

Define Ecludiean Distance

In [10]:
# reference : https://keras.io/examples/vision/siamese_contrastive/
def euclidean_distance(vects):
    x, y = vects
    sum_square = tf.math.reduce_sum(tf.math.square(x - y), axis=1, keepdims=True)
    return tf.math.sqrt(tf.math.maximum(sum_square, tf.keras.backend.epsilon()))


Define Contrastive Loss

In [11]:
# reference : https://keras.io/examples/vision/siamese_contrastive/
def loss(margin=1):
    def contrastive_loss(y_true, y_pred):

        square_pred = tf.math.square(y_pred)
        margin_square = tf.math.square(tf.math.maximum(margin - (y_pred), 0))
        return tf.math.reduce_mean(
            (1 - y_true) * square_pred + (y_true) * margin_square
        )

    return contrastive_loss

Defining Siamese Network

In [14]:
#Reference : https://www.kaggle.com/code/suraj520/siamese-network-100-acc-know-train-infer
def Siamese_Network(input_shape):
    input1 = Input(input_shape)
    input2 = Input(input_shape)

    model = Sequential()
    model.add(Conv2D(64, (3,3), activation='relu', input_shape=input_shape))
    model.add(MaxPooling2D(2,2))  # Updated max-pooling to 2x2 with stride 2
    model.add(Conv2D(128, (3,3), activation='relu'))  # convolutional layer with 128 filters and 3x3 size
    model.add(MaxPooling2D(2,2))
    # Flattening Layer
    model.add(Flatten())
    model.add(Dense(128, activation='relu')) # Fully connected layer with 128 neurons  &&&&
    model.add(Dropout(0.2))
    model.add(Dense(64, activation='relu'))  # Fully connected layer with 64 neurons  &&&
    embedding1 = model(input1)
    embedding2 = model(input2)
    model.summary()
    Ecudiean_distance = layers.Lambda(euclidean_distance)([embedding1, embedding2])
    output = Dense(1, activation='sigmoid')(Ecudiean_distance)
    network = Model(inputs=[input1, input2], outputs=output)
    return network

Defining Check points for weights.

In [15]:
#Reference : https://www.kaggle.com/code/suraj520/siamese-network-100-acc-know-train-infer
best_weights_path = "/content/drive/MyDrive/MASTERS_PROJECT/Signature_Verification_Project/Models/Mdoel_weightsGPDS.h5"
model_checkpoint = ModelCheckpoint(
    best_weights_path,
    monitor='val_loss',  # Choose the metric to monitor for best weights
    mode='min',              # Maximize the chosen metric
    save_best_only=True,     # Save only the best weights
    verbose=1
)

In [16]:
#Reference : https://www.kaggle.com/code/suraj520/siamese-network-100-acc-know-train-infer
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir="./logs")
early_stopper =  EarlyStopping(monitor='val_loss',patience=3)
custom_callback = [early_stopper,tensorboard_callback,model_checkpoint]
model_1 = Siamese_Network((224,224,1))
model_1.load_weights("Best_Weight_PATHS")
model_1.summary()
optimizer = Adam(learning_rate = 0.001)
model_1.compile(loss=loss(margin=1),optimizer=optimizer,metrics=['accuracy'])

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_6 (Conv2D)           (None, 222, 222, 64)      640       
                                                                 
 max_pooling2d_6 (MaxPoolin  (None, 111, 111, 64)      0         
 g2D)                                                            
                                                                 
 conv2d_7 (Conv2D)           (None, 109, 109, 128)     73856     
                                                                 
 max_pooling2d_7 (MaxPoolin  (None, 54, 54, 128)       0         
 g2D)                                                            
                                                                 
 flatten_3 (Flatten)         (None, 373248)            0         
                                                                 
 dense_6 (Dense)             (None, 128)              

Training and validation of the model

In [None]:
#Reference : https://www.kaggle.com/code/suraj520/siamese-network-100-acc-know-train-infer
batch_size = 128
steps_per_epoch = len(train_set) // batch_size
validation_steps = len(val_set) // batch_size

print("Initializing Training !!")
history = model_1.fit(
    train_gen.datagen(),
    steps_per_epoch=steps_per_epoch,
    epochs=20 ,
    validation_data=val_gen.datagen(),
    validation_steps=validation_steps,
    callbacks=custom_callback
)

Plotting the Training and Validation curves for Accuracy and loss.

In [None]:
history_keys = history.history.keys()

plt.figure(figsize=(12, 4))

# Plot the accuracy curves
if 'accuracy' in history_keys and 'val_accuracy' in history_keys:
    plt.subplot(1, 2, 1)
    plt.plot(history.history['accuracy'])
    plt.plot(history.history['val_accuracy'])
    plt.title('Model Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend(['Train', 'Validation'], loc='lower right')

# Plot the loss curves
if 'loss' in history_keys and 'val_loss' in history_keys:
    plt.subplot(1, 2, 2)
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('Model Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend(['Train', 'Validation'], loc='upper right')

# Adjust the layout and display the plot
plt.tight_layout()
plt.show()


TESTING THE MODEL...
---



In [None]:
#Reference : https://www.kaggle.com/code/suraj520/siamese-network-100-acc-know-train-infer
# Paths to the test images and directories
test_image_dir = "PATH_TO_TEST_DATA"
test_csv_path =  pd.read_csv("PATH_TO_CSV_FILE_FOR_TEST")

# Prepare test data (assuming you have DataLoader class defined)
pred_gen = DataLoader(test_csv_path, batch_size=len(test_csv_path), dir=test_image_dir)
images, true_labels = next(pred_gen.datagen())
print("Train batch images shape:", images[0].shape, images[1].shape)
print("Train batch labels shape:", len(true_labels))

In [None]:
#Reference : https://www.kaggle.com/code/suraj520/siamese-network-100-acc-know-train-infer
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix
import seaborn as sns
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.metrics import f1_score, precision_score, recall_score
from scipy.optimize import brentq
from sklearn.metrics import roc_curve
from sklearn.metrics import auc
from scipy.interpolate import interp1d

# Perform predictions
predictions = model_1.predict(images)
binary_predictions = []
for pred in predictions:
    if pred >= 0.5:
        binary_predictions.append(0)
    else:
        binary_predictions.append(1)

# Calculate metrics
accuracy = accuracy_score(true_labels, binary_predictions)
f1 = f1_score(true_labels, binary_predictions)
confusion_mat = confusion_matrix(true_labels, binary_predictions)

print("Confusion Matrix:")
# Print confusion matrix with labels and using seaborn
class_names = ['Class 0', 'Class 1']
plt.figure(figsize=(8, 6))
sns.heatmap(confusion_mat, annot=True, fmt='d', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()

EVALUATION MATRIX

In [None]:
frr = 1 - recall_score(true_labels, binary_predictions)
far = 1 - precision_score(true_labels, binary_predictions)
aer = (frr + far) / 2
fpr, fnr, _ = roc_curve(true_labels, binary_predictions)
eer = brentq(lambda x : 1. - x - interp1d(fpr, fnr)(x), 0., 1.)
fprt, tpr, _ = roc_curve(true_labels, binary_predictions)
roc_auc = auc(fpr, tpr)
plt.figure()
plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (area = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc="lower right")
plt.show()

In [None]:
# EER, FRR, AER,FAR
print("Equal Error Rate (EER):", round(eer,2))
print("False Rejection Rate (FRR):", round(frr,2))
print("False Acceptance Rate (FAR):", round(far,2))
print("Average Error Rate (AER):", round(aer,2))
print("Accuracy:", round(accuracy,2))
print("F1 Score:", round(f1,2))

best_val_accuracy = round(max(history.history['val_accuracy']),2)
best_train_accuracy = round(max(history.history['accuracy']),2)
print("Best Validation Accuracy:", best_val_accuracy)
print("Best Training Accuracy:", best_train_accuracy)

Checking model performance on the image sets

In [None]:
#Reference : https://www.kaggle.com/code/suraj520/siamese-network-100-acc-know-train-infer
# Define the paths to the test images
test_image1_path = 'PATH_TO_FIRST_IMAGE'
test_image2_path = 'PATH_TO_SECOND_IMAGE'

# Load and preprocess the test images
test_image1 = Image.open(test_image1_path)
disply_image_1= Image.open(test_image1_path)
test_image1 = test_image1.resize((224,224))
test_image1 = test_image1.convert('L')
test_image1 = normalization_layer(test_image1)
test_image1 = img_to_array(test_image1)
test_image1 = np.expand_dims(test_image1, axis=0)
test_image1 = test_image1.astype('float32')


test_image2 = Image.open(test_image2_path)
display_image_2 = Image.open(test_image2_path)
test_image2 = test_image2.resize((224,224))
test_image2 = test_image2.convert('L')
test_image2 = normalization_layer(test_image2)
test_image2 = img_to_array(test_image2)
test_image2 = np.expand_dims(test_image2, axis=0)
test_image2 = test_image2.astype('float32')

# Perform inference on the test images
prediction = model_1.predict([test_image1, test_image2])
# Print the similarity score
# similarity_score = prediction[0][0]
# print('Similarity Score:', similarity_score)
Result = []
if prediction <=0.5:
  Result = "FORGED"
else :
  Result = "REAL"

# Display the test images with the similarity score as legend
plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
plt.imshow(disply_image_1)
plt.title('Test Image 1')
plt.axis('off')
plt.subplot(1, 2, 2)
plt.imshow(display_image_2)
plt.title('Test Image 2')
plt.axis('off')

plt.suptitle(f'Result: {Result}', fontsize=12)
plt.tight_layout()
plt.show()