In [24]:
import os
import cv2
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import string
from torch_geometric.data import Data
from torch.utils.data import Dataset, random_split
from torch_geometric.loader import DataLoader
from torch_geometric.nn import GCNConv
from sklearn.neighbors import NearestNeighbors

In [25]:
# Path to CAPTCHA dataset
dataset_path = r"D:\Downloads\CAPTCHA"

# Function to preprocess CAPTCHA images
def preprocess_captcha(image_path, width=128, height=64):
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    if image is None:
        print(f"⚠️ Skipping file (failed to load): {image_path}")
        return None
    image = cv2.resize(image, (width, height))
    image = cv2.adaptiveThreshold(image, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                  cv2.THRESH_BINARY_INV, 11, 2)
    kernel = np.ones((2,2), np.uint8)
    image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel)
    return image

In [26]:
# Function to extract label from filename
def get_label_from_filename(filename):
    return os.path.splitext(filename)[0]

# Function to convert image into a graph
def image_to_graph(image_path, num_neighbors=5):
    image = preprocess_captcha(image_path)
    if image is None:
        return None
    sift = cv2.SIFT_create()
    keypoints, descriptors = sift.detectAndCompute(image, None)
    if len(keypoints) == 0:
        print(f"⚠️ Skipping {image_path}: No keypoints found")
        return None
    nodes = np.array([kp.pt for kp in keypoints], dtype=np.float32)
    nbrs = NearestNeighbors(n_neighbors=num_neighbors, algorithm='ball_tree').fit(nodes)
    _, indices = nbrs.kneighbors(nodes)
    edge_index = torch.tensor([(i, neighbor) for i, neighbors in enumerate(indices) for neighbor in neighbors if i != neighbor], dtype=torch.long).t().contiguous()
    x = torch.tensor(nodes, dtype=torch.float)
    return Data(x=x, edge_index=edge_index)

In [27]:
# Load dataset
graph_data_list = []
labels = []
image_files = [f for f in os.listdir(dataset_path) if f.endswith(('.png', '.jpg', '.jpeg'))]

for file in image_files:
    graph_data = image_to_graph(os.path.join(dataset_path, file))
    if graph_data is not None:
        graph_data_list.append(graph_data)
        labels.append(get_label_from_filename(file))

print(f"✅ Total valid graphs: {len(graph_data_list)}")

✅ Total valid graphs: 113062


In [29]:
# Encode labels
char_set = string.ascii_uppercase + string.digits
char_to_index = {char: i for i, char in enumerate(char_set)}
def encode_label(label):
    return [char_to_index[c.upper()] for c in label]
encoded_labels = [encode_label(label) for label in labels]

In [30]:
# Custom dataset class
class CaptchaGraphDataset(Dataset):
    def __init__(self, graph_data_list, encoded_labels):
        self.graph_data_list = graph_data_list
        self.encoded_labels = encoded_labels
    def __len__(self):
        return len(self.graph_data_list)
    def __getitem__(self, idx):
        return self.graph_data_list[idx], torch.tensor(self.encoded_labels[idx], dtype=torch.long)

# Split dataset into training and testing sets
train_size = int(0.8 * len(graph_data_list))
test_size = len(graph_data_list) - train_size
train_dataset, test_dataset = random_split(CaptchaGraphDataset(graph_data_list, encoded_labels), [train_size, test_size])

# Create DataLoader
batch_size = 16
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
print(f"Total Batches: {len(train_loader)}")

Total Batches: 5654


In [31]:
# Define GNN Model
class CaptchaGNN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(CaptchaGNN, self).__init__()
        self.conv1 = GCNConv(input_dim, hidden_dim)
        self.conv2 = GCNConv(hidden_dim, hidden_dim)
        self.fc = nn.Linear(hidden_dim, output_dim)
    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = F.relu(self.conv1(x, edge_index))
        x = F.relu(self.conv2(x, edge_index))
        x = self.fc(x)
        x = x.unsqueeze(1)  # Shape for CTC loss
        return x

In [None]:
# Model setup
input_dim = 2  # (x, y) coordinates
hidden_dim = 64
output_dim = len(char_set)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CaptchaGNN(input_dim, hidden_dim, output_dim).to(device)
criterion = nn.CTCLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Train model
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for data, labels in train_loader:
        data = data.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        output = model(data).log_softmax(2)
        labels = labels.view(-1)
        input_lengths = torch.tensor([output.size(0)], dtype=torch.long, device=device)
        target_lengths = torch.tensor([labels.numel()], dtype=torch.long, device=device)
        loss = criterion(output, labels, input_lengths, target_lengths)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss:.4f}")

# Save the trained model
torch.save(model.state_dict(), "captcha_gnn.pth")
print("✅ Model saved as captcha_gnn.pth")

Epoch [1/10], Loss: 88360.2914
Epoch [2/10], Loss: 81529.0842
Epoch [3/10], Loss: 77146.0000
Epoch [4/10], Loss: 78372.0182
Epoch [5/10], Loss: 78151.0572


In [None]:
# Evaluate model
model.eval()
total_correct = 0
total_samples = 0
with torch.no_grad():
    for data, labels in test_loader:
        data = data.to(device)
        labels = labels.to(device)
        output = model(data).argmax(dim=2)
        total_correct += (output == labels).sum().item()
        total_samples += labels.numel()
accuracy = total_correct / total_samples * 100
print(f"Test Accuracy: {accuracy:.2f}%")

In [None]:
# Inference on new CAPTCHA image
def predict_captcha(image_path):
    graph_data = image_to_graph(image_path)
    if graph_data is None:
        return None
    model.eval()
    with torch.no_grad():
        graph_data = graph_data.to(device)
        output = model(graph_data).argmax(dim=2).cpu().numpy()
    predicted_text = ''.join([char_set[i] for i in output.flatten()])
    return predicted_text

# Example usage
sample_image = R"D:\Downloads\Large_Captcha_Dataset\001up.png"
predicted_text = predict_captcha(sample_image)
print(f"Predicted CAPTCHA Text: {predicted_text}")
