<a href="https://colab.research.google.com/github/ayushraj16/-Signature-Forgery-Detection-using-Siamese-Network-CEDAR-Dataset-/blob/main/forgery_detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install gradio

In [None]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("shreelakshmigp/cedardataset")

print("Path to dataset files:", path)

In [None]:
# prompt: using the above dataset path , create a signature forgery detection system , siamese network with cedar dataset , also add an interface to insert the signature images .for interface use gradio

!pip install torch torchvision torchmetrics pytorch-lightning
!pip install transformers datasets

import os
import gradio as gr
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import pytorch_lightning as pl
from torchvision.models import resnet18
from torchmetrics.functional import accuracy

# Define the dataset class
class SignatureDataset(Dataset):
    def __init__(self, base_path, transform=None):
        self.base_path = base_path
        self.transform = transform
        self.image_paths = []
        self.labels = []
        self.authentic_signatures = {} # To group authentic signatures by writer

        # Assume the dataset structure is like:
        # base_path/
        #   original/
        #     writer_id/
        #       signature_image.png
        #   forged/
        #     writer_id/
        #       signature_image.png

        original_path = os.path.join(base_path, 'original')
        forged_path = os.path.join(base_path, 'forged')

        print(f"Checking for original signatures in: {original_path}")
        print(f"Checking for forged signatures in: {forged_path}")

        # Collect authentic signatures
        if os.path.exists(original_path):
            for writer_id in os.listdir(original_path):
                writer_dir = os.path.join(original_path, writer_id)
                if os.path.isdir(writer_dir):
                    authentic_imgs = [
                        os.path.join(writer_dir, img) for img in os.listdir(writer_dir) if img.lower().endswith(('.png', '.jpg', '.jpeg'))
                    ]
                    if authentic_imgs:
                        self.authentic_signatures[writer_id] = authentic_imgs
                    # else:
                        # print(f"No image files found in authentic directory for writer {writer_id}: {writer_dir}")
                # else:
                    # print(f"Not a directory: {writer_dir}")
        # else:
            # print(f"Original signatures path not found: {original_path}")


        # Collect pairs for training (authentic-authentic and authentic-forged)
        # This is a simplified approach. A more robust approach would involve creating balanced pairs.
        print(f"Found {len(self.authentic_signatures)} writers with authentic signatures.")
        if not self.authentic_signatures:
             print("No authentic signatures found. Cannot create pairs.")
             print(f"Expected original signatures in {original_path}")
             # You might want to raise an error or handle this case explicitly if no authentic signatures are found
             # raise ValueError("No authentic signatures found in the dataset.")


        for writer_id, auth_sigs in self.authentic_signatures.items():
            # Add authentic-authentic pairs (simplified: just the first authentic with others)
            if len(auth_sigs) > 1:
                for i in range(1, len(auth_sigs)):
                    self.image_paths.append((auth_sigs[0], auth_sigs[i]))
                    self.labels.append(1.0) # 1 for authentic

            # Add authentic-forged pairs
            writer_forged_path = os.path.join(forged_path, writer_id)
            if os.path.exists(writer_forged_path):
                forged_files = [img for img in os.listdir(writer_forged_path) if img.lower().endswith(('.png', '.jpg', '.jpeg'))]
                if forged_files:
                    if auth_sigs: # Ensure there's at least one authentic signature to pair with
                         for forged_img_name in forged_files:
                            self.image_paths.append((auth_sigs[0], os.path.join(writer_forged_path, forged_img_name)))
                            self.labels.append(0.0) # 0 for forged
                    # else:
                        # print(f"No authentic signatures found for writer {writer_id} to pair with forged.")
                # else:
                     # print(f"No forged image files found for writer {writer_id} in: {writer_forged_path}")
            # else:
                # print(f"Forged signatures path not found for writer {writer_id}: {writer_forged_path}")


        print(f"Found {len(self.image_paths)} image pairs.")

        if len(self.image_paths) == 0:
            print("Error: No image pairs were created. Please check the dataset path and structure.")
            print(f"Base path used: {base_path}")
            print(f"Original path checked: {original_path}, Exists: {os.path.exists(original_path)}")
            print(f"Forged path checked: {forged_path}, Exists: {os.path.exists(forged_path)}")
            if os.path.exists(original_path):
                 print(f"Listing contents of original path: {os.listdir(original_path)[:5]}...") # print first 5 items
            if os.path.exists(forged_path):
                 print(f"Listing contents of forged path: {os.listdir(forged_path)[:5]}...") # print first 5 items


    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img1_path, img2_path = self.image_paths[idx]
        label = self.labels[idx]

        img1 = Image.open(img1_path).convert('RGB')
        img2 = Image.open(img2_path).convert('RGB')

        if self.transform:
            img1 = self.transform(img1)
            img2 = self.transform(img2)

        return img1, img2, torch.tensor(label, dtype=torch.float32)

# Define the Siamese Network
class SiameseNetwork(pl.LightningModule):
    def __init__(self):
        super().__init__()
        self.encoder = resnet18(weights='ResNet18_Weights.DEFAULT')
        # Remove the classification head
        self.encoder = torch.nn.Sequential(*(list(self.encoder.children())[:-1]))

        # Add a linear layer to reduce feature dimensions (optional, depends on task)
        # Example: if resnet18 outputs 512, reduce to 128
        # self.fc = nn.Linear(512, 128)

        # For signature verification, we typically use contrastive loss or triplet loss.
        # We don't need a final classification layer here, as we calculate distance.

    def forward(self, x1, x2):
        # Encode each image
        encoded_x1 = self.encoder(x1).squeeze() # Remove spatial dimensions
        encoded_x2 = self.encoder(x2).squeeze() # Remove spatial dimensions

        # Optional: Apply the linear layer
        # encoded_x1 = self.fc(encoded_x1)
        # encoded_x2 = self.fc(encoded_x2)

        # Return the encoded features
        return encoded_x1, encoded_x2

    def get_distance(self, x1, x2):
         # Encode and calculate L2 distance
        encoded_x1, encoded_x2 = self(x1, x2)
        distance = torch.pairwise_distance(encoded_x1, encoded_x2, p=2)
        return distance

    def training_step(self, batch, batch_idx):
        img1, img2, labels = batch
        # Use contrastive loss
        # Distance between encoded images
        distance = self.get_distance(img1, img2)

        # Define contrastive loss function manually or use available implementations
        # Contrastive Loss: L = y * D^2 + (1-y) * max(0, margin - D)^2
        # y=1 for similar pairs, y=0 for dissimilar pairs
        margin = 1.0 # Hyperparameter
        loss = labels * torch.pow(distance, 2) + (1 - labels) * torch.pow(torch.clamp(margin - distance, min=0.0), 2)
        loss = torch.mean(loss)

        self.log('train_loss', loss)
        return loss

    def configure_optimizers(self):
        optimizer = optim.Adam(self.parameters(), lr=1e-4)
        return optimizer

# Data transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)), # Resize images
    transforms.ToTensor(),        # Convert to PyTorch tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # ImageNet normalization
])

# --- Training (Optional - you might load a pre-trained model) ---
# Prepare dataset and dataloader
# Assuming the path variable from the previous cell is the base path
if 'path' in locals() and path is not None:
    # Check if the expected subdirectories exist in the path
    expected_original_path = os.path.join(path, 'original')
    expected_forged_path = os.path.join(path, 'forged')

    if os.path.exists(expected_original_path) and os.path.exists(expected_forged_path):
        dataset = SignatureDataset(path, transform=transform)

        # Check if the dataset is empty before creating the DataLoader
        if len(dataset) > 0:
            # Split into train and validation (optional)
            # train_size = int(0.8 * len(dataset))
            # val_size = len(dataset) - train_size
            # train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])

            train_dataloader = DataLoader(dataset, batch_size=32, shuffle=True)
            # val_dataloader = DataLoader(val_dataset, batch_size=32)

            # Initialize and train the model
            model = SiameseNetwork()
            trainer = pl.Trainer(max_epochs=10) # Train for 10 epochs
            # trainer.fit(model, train_dataloader, val_dataloader) # If you have validation
            trainer.fit(model, train_dataloader)

            print("Training complete.")

            # Save the trained model
            torch.save(model.state_dict(), 'siamese_signature_model.pth')
            print("Model saved to siamese_signature_model.pth")
        else:
            print("Dataset is empty. No image pairs found to train the model.")
            print("Please check the dataset structure and ensure image files are present in the expected directories.")

    else:
        print(f"Expected subdirectories 'original' and 'forged' not found in {path}.")
        print("Skipping training.")

else:
    print("Dataset path not found. Skipping training.")
    # If skipping training, you would load a pre-trained model here.
    # model = SiameseNetwork()
    # model.load_state_dict(torch.load('path/to/your/pretrained_model.pth'))
    # model.eval() # Set model to evaluation mode

# --- Gradio Interface ---

# Load the trained model for inference
# Ensure the model is loaded whether trained now or pre-trained
model = SiameseNetwork()
if os.path.exists('siamese_signature_model.pth'):
    model.load_state_dict(torch.load('siamese_signature_model.pth'))
    print("Loaded trained model for inference.")
else:
     # If no trained model is found, you might want to raise an error or load a default pre-trained one
     print("No trained model found. Please ensure training was successful or a model file exists.")
     # Example of loading a dummy state dict if needed to initialize the model structure
     # (This won't perform meaningful inference without proper weights)
     # dummy_state_dict = model.state_dict()
     # torch.save(dummy_state_dict, 'siamese_signature_model.pth')
     # model.load_state_dict(torch.load('siamese_signature_model.pth'))


model.eval() # Set model to evaluation mode

# Function for prediction
def predict_forgery(authentic_signature_image, test_signature_image):
    if authentic_signature_image is None or test_signature_image is None:
        return "Please upload both authentic and test signatures."

    # Apply the same transformations as during training
    img1_tensor = transform(Image.fromarray(authentic_signature_image).convert('RGB')).unsqueeze(0) # Add batch dimension
    img2_tensor = transform(Image.fromarray(test_signature_image).convert('RGB')).unsqueeze(0)

    # Move tensors to the same device as the model
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    img1_tensor = img1_tensor.to(device)
    img2_tensor = img2_tensor.to(device)

    with torch.no_grad(): # Disable gradient calculation
        distance = model.get_distance(img1_tensor, img2_tensor)

    # You need to determine a threshold for the distance.
    # Distances below the threshold indicate similar signatures (authentic),
    # and distances above indicate dissimilar (forged).
    # This threshold should be determined during validation/testing on a separate dataset.
    # For demonstration, let's use a placeholder threshold.
    threshold = 0.5 # This is a placeholder. You MUST find a suitable threshold.

    if distance.item() < threshold:
        result = f"Signatures are similar (likely Authentic). Distance: {distance.item():.4f}"
    else:
        result = f"Signatures are dissimilar (likely Forged). Distance: {distance.item():.4f}"

    return result

# Create the Gradio interface
iface = gr.Interface(
    fn=predict_forgery,
    inputs=[
        gr.Image(type="numpy", label="Upload Authentic Signature"),
        gr.Image(type="numpy", label="Upload Test Signature")
    ],
    outputs="text",
    title="Signature Forgery Detection",
    description="Upload an authentic signature and a test signature to check for forgery."
)

# Launch the interface
print("Launching Gradio interface...")
iface.launch(debug=True)