In [1]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

# Set image dimensions and path
IMG_HEIGHT, IMG_WIDTH = 128, 128  # Standardize images to 128x128 dimensions
DATA_PATH = '/content/drive/MyDrive/Signature'  # Directory containing signature images

def load_images():
    """
    Loads and preprocesses images from the specified dataset directory.

    - Reads images from 'genuine' and 'forged' categories.
    - Labels images as genuine (1) or forged (0) based on filename metadata.
    - Normalizes image pixel values and reshapes for CNN compatibility.

    Returns:
        images (numpy array): Preprocessed images.
        labels (numpy array): Corresponding labels.
    """
    images, labels = [], []
    for category in ['genuine', 'forged']:  # Loop through both categories
        path = os.path.join(DATA_PATH, category)  # Construct category path
        for filename in os.listdir(path):  # Iterate over image files in the category
            # Extract owner ID and signer ID from the filename
            id_owner = filename.split('-')[1][:3]  # First 3 digits of the second segment
            id_signer = filename.split('-')[1][5:8]  # Digits 5-7 of the second segment

            # Label the image as 1 (genuine) if owner ID matches signer ID, else 0
            label = 1 if id_owner == id_signer else 0

            # Read the image in grayscale mode
            img = cv2.imread(os.path.join(path, filename), cv2.IMREAD_GRAYSCALE)
            # Resize the image to standard dimensions
            img = cv2.resize(img, (IMG_WIDTH, IMG_HEIGHT))
            # Normalize pixel values to range [0, 1]
            img = img / 255.0

            images.append(img)  # Append preprocessed image to list
            labels.append(label)  # Append corresponding label

    # Convert image and label lists to numpy arrays
    images = np.array(images).reshape(-1, IMG_HEIGHT, IMG_WIDTH, 1)  # Add channel dimension for CNN
    labels = np.array(labels)  # Convert labels to numpy array
    return images, labels

# Load and preprocess the dataset
X, y = load_images()
y = to_categorical(y, num_classes=2)  # Convert labels to one-hot encoding for classification

# Split the dataset into training, testing, and validation sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)  # 70% training, 30% testing
X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, test_size=0.5, random_state=42)  # Split test set equally into validation and testing

# Print the shapes of the datasets to confirm successful splitting
print(f"Training set shape: {X_train.shape}, Testing set shape: {X_test.shape}")


Training set shape: (210, 128, 128, 1), Testing set shape: (45, 128, 128, 1)


In [2]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

def build_model():
    """
    Builds and compiles a Convolutional Neural Network (CNN) model for signature verification.

    Model Architecture:
    - Convolutional and MaxPooling layers for feature extraction.
    - Dropout layers to reduce overfitting.
    - Fully connected Dense layers for classification.
    - Softmax activation for binary classification output.

    Returns:
        model (Sequential): Compiled CNN model.
    """
    model = Sequential([
        # First convolutional block
        Conv2D(32, (3, 3), activation='relu', input_shape=(IMG_HEIGHT, IMG_WIDTH, 1)),  # 32 filters, 3x3 kernel
        MaxPooling2D((2, 2)),  # Reduce spatial dimensions by half
        Dropout(0.25),  # Prevent overfitting

        # Second convolutional block
        Conv2D(64, (3, 3), activation='relu'),  # 64 filters, 3x3 kernel
        MaxPooling2D((2, 2)),  # Reduce spatial dimensions by half
        Dropout(0.25),  # Prevent overfitting

        # Third convolutional block
        Conv2D(128, (3, 3), activation='relu'),  # 128 filters, 3x3 kernel
        MaxPooling2D((2, 2)),  # Reduce spatial dimensions by half
        Dropout(0.25),  # Prevent overfitting

        # Fully connected layers
        Flatten(),  # Flatten 3D feature maps to 1D feature vector
        Dense(128, activation='relu'),  # Dense layer with 128 neurons
        Dropout(0.5),  # Higher dropout for regularization
        Dense(2, activation='softmax')  # Output layer with 2 neurons for binary classification
    ])

    # Compile the model with Adam optimizer and categorical crossentropy loss
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# Build and summarize the CNN model
model = build_model()
model.summary()  # Print the model architecture


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [3]:
# Define the number of epochs and batch size for training
EPOCHS = 45  # Number of complete passes through the training data
BATCH_SIZE = 32  # Number of samples per batch during training

# Train the CNN model
history = model.fit(
    X_train, y_train,            # Training data and corresponding labels
    epochs=EPOCHS,               # Number of epochs to train
    batch_size=BATCH_SIZE,       # Batch size for gradient updates
    validation_data=(X_val, y_val),  # Validation data and labels for monitoring
    verbose=1                    # Verbose output for training progress
)


Epoch 1/45
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 1s/step - accuracy: 0.4614 - loss: 0.9033 - val_accuracy: 0.3556 - val_loss: 0.6933
Epoch 2/45
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 914ms/step - accuracy: 0.5077 - loss: 0.6945 - val_accuracy: 0.4222 - val_loss: 0.6945
Epoch 3/45
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 1s/step - accuracy: 0.5364 - loss: 0.6914 - val_accuracy: 0.4222 - val_loss: 0.6940
Epoch 4/45
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 2s/step - accuracy: 0.5000 - loss: 0.6922 - val_accuracy: 0.4222 - val_loss: 0.6928
Epoch 5/45
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 1s/step - accuracy: 0.4629 - loss: 0.6943 - val_accuracy: 0.4444 - val_loss: 0.6913
Epoch 6/45
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 897ms/step - accuracy: 0.5419 - loss: 0.6906 - val_accuracy: 0.4444 - val_loss: 0.6914
Epoch 7/45
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━

In [10]:
# Save the trained model to a specified file path
model.save('/content/drive/MyDrive/Dataset_CNN/Signature/Models/sign_45.h5')




In [13]:
# Evaluate the model on the testing dataset
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)  # Set verbose to 0 for silent output

# Print the test accuracy and loss
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")  # Convert accuracy to percentage and format to 2 decimal places
print(f"Test Loss: {test_loss:.4f}")  # Display loss formatted to 4 decimal places


Test Accuracy: 82.22%
Test Loss: 0.6662


In [14]:
import numpy as np

# Generate predictions for the test data
y_pred = model.predict(X_test)  # Get predicted probabilities for each class
y_pred_classes = np.argmax(y_pred, axis=1)  # Convert probabilities to predicted class labels
y_true_classes = np.argmax(y_test, axis=1)  # Extract true class labels from one-hot encoded test labels

# Print unique class values in true and predicted labels
print("Unique values in y_true_classes:", np.unique(y_true_classes))  # Display unique true class labels
print("Unique values in y_pred_classes:", np.unique(y_pred_classes))  # Display unique predicted class labels


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 252ms/step
Unique values in y_true_classes: [0 1]
Unique values in y_pred_classes: [0 1]


In [15]:
from sklearn.metrics import classification_report, confusion_matrix

# Predict on the test set
y_pred = model.predict(X_test)  # Get predicted probabilities
y_pred_classes = np.argmax(y_pred, axis=1)  # Convert probabilities to predicted class labels
y_true_classes = np.argmax(y_test, axis=1)  # Convert one-hot encoded labels to class indices

# Classification report
print(classification_report(y_true_classes, y_pred_classes, target_names=["Forged", "Genuine"]))

# Confusion Matrix
cm = confusion_matrix(y_true_classes, y_pred_classes)
print("Confusion Matrix:\n", cm)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 90ms/step
              precision    recall  f1-score   support

      Forged       0.94      0.70      0.80        23
     Genuine       0.75      0.95      0.84        22

    accuracy                           0.82        45
   macro avg       0.85      0.83      0.82        45
weighted avg       0.85      0.82      0.82        45

Confusion Matrix:
 [[16  7]
 [ 1 21]]


In [8]:
import cv2
import numpy as np

def preprocess_image(file_path):
    """
    Preprocesses an input image for prediction by the trained CNN model.

    Steps:
    - Reads the image in grayscale mode.
    - Resizes the image to 128x128 dimensions to match the model input size.
    - Normalizes pixel values to the range [0, 1].
    - Reshapes the image to add batch and channel dimensions.

    Args:
        file_path (str): Path to the image file.

    Returns:
        numpy.ndarray: Preprocessed image ready for prediction.
    """
    img = cv2.imread(file_path, cv2.IMREAD_GRAYSCALE)  # Read as grayscale
    img = cv2.resize(img, (128, 128))  # Resize to match training dimensions
    img = img / 255.0  # Normalize to [0, 1]
    img = img.reshape(1, 128, 128, 1)  # Add batch and channel dimensions
    return img


In [9]:
# Path to the new image you want to test
file_path = '/content/drive/MyDrive/Signature/forged/NFI-08804004.png'

# Preprocess the image
processed_img = preprocess_image(file_path)

# Predict using the trained model
prediction = model.predict(processed_img)  # Get prediction probabilities
predicted_class = np.argmax(prediction)  # Convert probabilities to class label (0 or 1)

# Print the result
if predicted_class == 1:
    print("The signature is Genuine.")
else:
    print("The signature is Forged.")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
The signature is Forged.
