In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from helper_functions.encode import (
    prepare_training_data, CHARACTERS, 
    segment_captcha_with_projection, preprocess_image,
    PROJECTION_THRESHOLD, IMG_HEIGHT, IMG_WIDTH
)
import cv2
import os
from tqdm import tqdm
from collections import defaultdict

# Custom Dataset Class
class CharDataset(Dataset):
    def __init__(self, X_img, y):
        self.X = torch.tensor(X_img, dtype=torch.float32).permute(0, 3, 1, 2)  # NHWC -> NCHW
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# CNN Model Architecture
class CNN(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.fc1 = nn.Linear(64 * 10 * 7, 128)  # Adjusted for 40x30 input
        self.fc2 = nn.Linear(128, num_classes)

    def forward(self, x):
        x = self.pool(nn.functional.relu(self.conv1(x)))
        x = self.pool(nn.functional.relu(self.conv2(x)))
        x = x.view(-1, 64 * 10 * 7)
        x = nn.functional.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Character-Level Evaluation Function
def evaluate_character_level(model, dataloader, device):
    model.eval()
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
    return {
        'accuracy': accuracy_score(all_labels, all_preds),
        'precision': precision_score(all_labels, all_preds, average='macro', zero_division=0),
        'recall': recall_score(all_labels, all_preds, average='macro', zero_division=0),
        'f1': f1_score(all_labels, all_preds, average='macro', zero_division=0)
    }

# Captcha-Level Evaluation Function
def evaluate_captcha_level(model, folder_path, device):
    model.eval()
    all_images = [f for f in os.listdir(folder_path) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
    
    # Initialize metrics
    correct = 0
    total = 0
    y_true = []
    y_pred = []
    
    for filename in tqdm(all_images, desc="Evaluating CAPTCHAs"):
        image_path = os.path.join(folder_path, filename)
        correct_label = os.path.splitext(filename)[0].split('-')[0]
        
        # Preprocess and segment
        image = cv2.imread(image_path)
        thresh = preprocess_image(image)
        boundaries, _, _ = segment_captcha_with_projection(thresh, PROJECTION_THRESHOLD)
        
        # Skip if segmentation failed
        if len(boundaries) != len(correct_label):
            continue
        
        # Process each character
        predicted_chars = []
        for i, (start, end) in enumerate(boundaries):
            char_img = thresh[:, start:end]
            resized = cv2.resize(char_img, (IMG_WIDTH, IMG_HEIGHT))
            resized = resized.reshape(IMG_HEIGHT, IMG_WIDTH, 1) / 255.0
            
            # Convert to tensor and predict
            tensor_img = torch.tensor(resized, dtype=torch.float32).permute(2, 0, 1).unsqueeze(0).to(device)
            with torch.no_grad():
                outputs = model(tensor_img)
                _, pred = torch.max(outputs, 1)
                predicted_char = CHARACTERS[pred.item()]
                predicted_chars.append(predicted_char)
                
                # Collect true and predicted labels
                y_true.append(correct_label[i])
                y_pred.append(predicted_char)
        
        # Update CAPTCHA-level accuracy
        if ''.join(predicted_chars) == correct_label:
            correct += 1
        total += 1
    
    # Calculate metrics
    if total == 0:
        return {
            'accuracy': 0.0,
            'precision': 0.0,
            'recall': 0.0,
            'f1': 0.0
        }
    
    # Calculate character-level metrics
    accuracy = correct / total
    precision = precision_score(y_true, y_pred, labels=list(CHARACTERS), average='macro', zero_division=0)
    recall = recall_score(y_true, y_pred, labels=list(CHARACTERS), average='macro', zero_division=0)
    f1 = f1_score(y_true, y_pred, labels=list(CHARACTERS), average='macro', zero_division=0)
    
    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1
    }

# Pipeline

In [2]:
# Prepare data
train_folder = "../data/train/combine"
test_folder = "../data/test/combine"

print("Preparing training data...")
X_train, _, y_train_onehot, _, _ = prepare_training_data(train_folder)
y_train = torch.tensor(np.argmax(y_train_onehot, axis=1), dtype=torch.long)

print("Preparing test data...")
X_test, _, y_test_onehot, _, _ = prepare_training_data(test_folder)
y_test = torch.tensor(np.argmax(y_test_onehot, axis=1), dtype=torch.long)

# Create dataloaders
train_loader = DataLoader(CharDataset(X_train, y_train), batch_size=32, shuffle=True)
test_loader = DataLoader(CharDataset(X_test, y_test), batch_size=32, shuffle=False)

# Initialize model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = CNN(num_classes=len(CHARACTERS)).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
print("Training model...")
for epoch in range(15):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item() * inputs.size(0)
    
    print(f"Epoch {epoch+1} Loss: {running_loss/len(train_loader.dataset):.4f}")

Preparing training data...


Preparing Data: 100%|██████████| 7437/7437 [00:48<00:00, 154.54it/s]


Preparing test data...


Preparing Data: 100%|██████████| 1894/1894 [00:12<00:00, 154.89it/s]


Training model...
Epoch 1 Loss: 1.5586
Epoch 2 Loss: 0.8260
Epoch 3 Loss: 0.6574
Epoch 4 Loss: 0.5540
Epoch 5 Loss: 0.4813
Epoch 6 Loss: 0.4177
Epoch 7 Loss: 0.3691
Epoch 8 Loss: 0.3277
Epoch 9 Loss: 0.2941
Epoch 10 Loss: 0.2578
Epoch 11 Loss: 0.2293
Epoch 12 Loss: 0.2052
Epoch 13 Loss: 0.1837
Epoch 14 Loss: 0.1669
Epoch 15 Loss: 0.1482


# Evaluation

In [5]:

# Evaluation
print("\nCharacter-Level Evaluation:")
char_metrics = evaluate_character_level(model, test_loader, device)
for metric, value in char_metrics.items():
    print(f"{metric.capitalize()}: {value:.4f}")

print("\nCaptcha-Level Evaluation:")
captcha_metrics = evaluate_captcha_level(model, test_folder, device)
for metric, value in captcha_metrics.items():
    print(f"{metric.capitalize()}: {value:.4f}")




Character-Level Evaluation:
Accuracy: 0.8188
Precision: 0.8243
Recall: 0.8163
F1: 0.8173

Captcha-Level Evaluation:


Evaluating CAPTCHAs: 100%|██████████| 1894/1894 [00:20<00:00, 94.52it/s] 


Accuracy: 0.4384
Precision: 0.8243
Recall: 0.8163
F1: 0.8173
