In [14]:
import os
import random
from itertools import combinations
import numpy as np

# Define the path to your dataset folder
dataset_folder = 'archive\CEDAR\CEDAR'

# Define the percentage of data for the training set
train_percentage = 0.7

# Initialize lists to store image pairs and their labels
train_image_pairs = []
train_labels = []
test_image_pairs = []
test_labels = []

# Iterate through each folder in the dataset
for folder_name in os.listdir(dataset_folder):
    folder_path = os.path.join(dataset_folder, folder_name)

    # Create a list of original and forged images in the folder
    original_images = []
    forged_images = []
    for file_name in os.listdir(folder_path):
        if file_name.startswith('original'):
            original_images.append(file_name)
        elif file_name.startswith('forgeries'):
            forged_images.append(file_name)

    # Shuffle the original and forged images to ensure randomness
    random.shuffle(original_images)
    random.shuffle(forged_images)

    # # Determine how many images to use for training and testing based on the percentage
    # num_train_original = int(train_percentage * len(original_images))
    # num_test_original = len(original_images) - num_train_original
    # num_train_forged = int(train_percentage * len(forged_images))
    # num_test_forged = len(forged_images) - num_train_forged

    # Generate pairs for original-original and original-forged combinations
    original_original_pairs = list(combinations(original_images, 2))
    original_forged_pairs = [(original_image, forged_image) for original_image in original_images for forged_image in forged_images]

    # Shuffle the pairs for randomness
    random.shuffle(original_original_pairs)
    random.shuffle(original_forged_pairs)

    # Select an equal number of original-original and original-forged pairs for training
    original_original_pairs = original_original_pairs[:50]
    original_forged_pairs = original_forged_pairs[:len(original_original_pairs)]


    train_number=int(train_percentage*len(original_original_pairs))

    train_original_original_pairs=original_original_pairs[:train_number]
    train_original_forged_pairs=original_forged_pairs[:train_number]
    # Assign labels (0 for original-original and 1 for original-forged)
    train_labels.extend([0] * len(train_original_original_pairs) + [1] * len(train_original_forged_pairs))


    # Extend the training sets
    train_image_pairs.extend([(os.path.join(folder_path, pair[0]), os.path.join(folder_path, pair[1])) for pair in train_original_original_pairs + train_original_forged_pairs])

    # Select the remaining pairs for testing
    test_original_original_pairs = original_original_pairs[train_number:]
    test_original_forged_pairs = original_forged_pairs[train_number:]

    # Assign labels for testing
    test_labels.extend([0] * len(test_original_original_pairs) + [1] * len(test_original_forged_pairs))

    # Extend the testing sets
    test_image_pairs.extend([(os.path.join(folder_path, pair[0]), os.path.join(folder_path, pair[1])) for pair in test_original_original_pairs + test_original_forged_pairs])

# Now you have the training and testing sets with image pairs and labels as per your requirements


In [15]:
# Print a few examples from test_image_pairs and test_labels
print("Sample test_image_pairs:")
for i in range(min(5, len(test_image_pairs))):  # Print up to 5 examples
    print(test_image_pairs[i], "Label:", test_labels[i])

# Print the total number of test_image_pairs and test_labels
print("Number of test_image_pairs:", len(test_image_pairs))
print("Number of test_labels:", len(test_labels))

# Print a few examples from train_image_pairs and train_labels
print("\nSample train_image_pairs:")
for i in range(min(5, len(train_image_pairs))):  # Print up to 5 examples
    print(train_image_pairs[i], "Label:", train_labels[i])

# Print the total number of train_image_pairs and train_labels
print("Number of train_image_pairs:", len(train_image_pairs))
print("Number of train_labels:", len(train_labels))


Sample test_image_pairs:
('archive\\CEDAR\\CEDAR\\1\\original_1_2.png', 'archive\\CEDAR\\CEDAR\\1\\original_1_7.png') Label: 0
('archive\\CEDAR\\CEDAR\\1\\original_1_13.png', 'archive\\CEDAR\\CEDAR\\1\\original_1_16.png') Label: 0
('archive\\CEDAR\\CEDAR\\1\\original_1_11.png', 'archive\\CEDAR\\CEDAR\\1\\original_1_22.png') Label: 0
('archive\\CEDAR\\CEDAR\\1\\original_1_14.png', 'archive\\CEDAR\\CEDAR\\1\\original_1_6.png') Label: 0
('archive\\CEDAR\\CEDAR\\1\\original_1_10.png', 'archive\\CEDAR\\CEDAR\\1\\original_1_22.png') Label: 0
Number of test_image_pairs: 1650
Number of test_labels: 1650

Sample train_image_pairs:
('archive\\CEDAR\\CEDAR\\1\\original_1_3.png', 'archive\\CEDAR\\CEDAR\\1\\original_1_8.png') Label: 0
('archive\\CEDAR\\CEDAR\\1\\original_1_12.png', 'archive\\CEDAR\\CEDAR\\1\\original_1_21.png') Label: 0
('archive\\CEDAR\\CEDAR\\1\\original_1_2.png', 'archive\\CEDAR\\CEDAR\\1\\original_1_24.png') Label: 0
('archive\\CEDAR\\CEDAR\\1\\original_1_24.png', 'archive\\CED

In [17]:
import tensorflow as tf
from tensorflow import keras
from keras import layers
from keras.callbacks import EarlyStopping

# Define a function to preprocess the image pairs
def preprocess_image(image_path):
    # Load, resize, and normalize the image
    image = keras.preprocessing.image.load_img(image_path, target_size=(224, 224))
    image = keras.preprocessing.image.img_to_array(image) / 255.0
    return image

# Define the Siamese CNN model
def create_siamese_model(input_shape):
    # Define the base network (subnetwork)
    input_layer = keras.layers.Input(shape=input_shape)
    x = keras.layers.Conv2D(64, (3, 3), activation='relu')(input_layer)
    x = keras.layers.MaxPooling2D()(x)
    x = keras.layers.Conv2D(128, (3, 3), activation='relu')(x)
    x = keras.layers.MaxPooling2D()(x)
    x = keras.layers.Flatten()(x)
    x = keras.layers.Dense(256, activation='relu')(x)

    base_network = keras.models.Model(input_layer, x)

    # Create the left and right inputs
    input_left = keras.layers.Input(shape=input_shape)
    input_right = keras.layers.Input(shape=input_shape)

    # Generate the embeddings for the left and right inputs
    embedding_left = base_network(input_left)
    embedding_right = base_network(input_right)

    # Calculate the L1 distance between the embeddings
    L1_distance = keras.layers.Lambda(lambda embeddings: tf.abs(embeddings[0] - embeddings[1]))([embedding_left, embedding_right])

    # Output layer
    output_layer = keras.layers.Dense(1, activation='sigmoid')(L1_distance)

    # Create the Siamese model
    siamese_model = keras.models.Model(inputs=[input_left, input_right], outputs=output_layer)

    return siamese_model

# Create and compile the Siamese model
input_shape = (224, 224, 3)
siamese_model = create_siamese_model(input_shape)
siamese_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

import numpy as np

# Convert your image pairs to NumPy arrays
train_image_pairs_left = np.array([preprocess_image(pair[0]) for pair in train_image_pairs])
train_image_pairs_right = np.array([preprocess_image(pair[1]) for pair in train_image_pairs])

# Convert your labels to a NumPy array
train_labels = np.array(train_labels)

# Similarly, preprocess and convert test image pairs and labels
test_image_pairs_left = np.array([preprocess_image(pair[0]) for pair in test_image_pairs])
test_image_pairs_right = np.array([preprocess_image(pair[1]) for pair in test_image_pairs])
test_labels = np.array(test_labels)

from keras.callbacks import Callback

class CustomEarlyStopping(Callback):
    def __init__(self, monitor='val_accuracy', target_accuracy=0.95):
        super(CustomEarlyStopping, self).__init__()
        self.monitor = monitor
        self.target_accuracy = target_accuracy

    def on_epoch_end(self, epoch, logs=None):
        current_accuracy = logs.get(self.monitor)
        if current_accuracy is not None and current_accuracy >= self.target_accuracy:
            print(f"Reached target accuracy of {self.target_accuracy}. Stopping training.")
            self.model.stop_training = True

custom_early_stopping = CustomEarlyStopping(target_accuracy=0.95)


# early_stopping = EarlyStopping(monitor='val_accuracy', patience=1, restore_best_weights=True)

# siamese_model.fit(
#     [train_image_pairs_left, train_image_pairs_right],
#     train_labels,
#     batch_size=8,
#     epochs=10,
#     validation_data=([test_image_pairs_left, test_image_pairs_right], test_labels)
# )

siamese_model.fit(
    [train_image_pairs_left, train_image_pairs_right],
    train_labels,
    batch_size=16,
    epochs=10,
    validation_data=([test_image_pairs_left, test_image_pairs_right], test_labels),
    callbacks=[custom_early_stopping]
)


siamese_model.save('final')



# You can then use the test predictions to calculate the percentage of similarity between signature pairs.


Epoch 1/10
Epoch 2/10




INFO:tensorflow:Assets written to: final\assets


INFO:tensorflow:Assets written to: final\assets


In [19]:
# Evaluate the model
test_predictions = siamese_model.predict(
    [test_image_pairs_left,test_image_pairs_right]
)



In [38]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Assuming test_predictions are probabilities or class labels and test_labels are the true labels
# If test_predictions are probabilities, you can convert them to class labels using a threshold
threshold = 0.5  # You can adjust the threshold based on your specific problem
predicted_labels = (test_predictions > threshold).astype(int)

accuracy = accuracy_score(test_labels, predicted_labels)
precision = precision_score(test_labels, predicted_labels)
recall = recall_score(test_labels, predicted_labels)
f1 = f1_score(test_labels, predicted_labels)

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-Score:", f1)


Accuracy: 0.98
Precision: 0.9615384615384616
Recall: 1.0
F1-Score: 0.9803921568627451


In [25]:
test_predictions[70]

array([0.00049728], dtype=float32)

In [33]:
# Load and preprocess your two test images

# image1=preprocess_image('forgeries_1_1.png')
# image2=preprocess_image('original_1_1.png')
# image3=preprocess_image('original_1_3.png')


image1 = np.array([preprocess_image('forgeries_13_20.png') ])
image2 = np.array([preprocess_image('forgeries_13_24.png') ])
image3 = np.array([preprocess_image('original_13_7.png') ])
image4 = np.array([preprocess_image('original_13_16.png') ])




In [34]:
test_predictions1= siamese_model.predict([image1,image2])
print(test_predictions1)

[[0.00138519]]


In [35]:
test_predictions1= siamese_model.predict([image3,image2])
print(test_predictions1)

[[1.]]


In [36]:
test_predictions1= siamese_model.predict([image3,image4])
print(test_predictions1)

[[0.06186659]]


In [37]:
test_predictions1= siamese_model.predict([image2,image4])
print(test_predictions1)

[[1.]]
