# Import Libraries

In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt

import cv2
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
import joblib  # to save/load model

import os
import pandas as pd
from PIL import Image
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

# Main

In [None]:
# Look through image folder
image_folder = 'sampleCaptchas/input/'
image_files = [f for f in os.listdir(image_folder) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
image_files

# Show Raw Images

In [None]:
text_file_path = 'sampleCaptchas/output/'

for img_name in image_files:
    print(img_name)
    text_file = img_name.replace('jpg', 'txt').replace('input', 'output')
    with open(text_file_path + text_file, 'r') as f:
        for line in f:
            print(line.strip())
        
    img_path = os.path.join(image_folder, img_name)
    img = cv2.imread(img_path)
    
    if img is None:
        print(f"Failed to load {img_name}")
        continue
    
    # Convert from BGR to RGB for matplotlib
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    
    plt.imshow(img_rgb)
    plt.title(img_name)
    plt.axis('off')
    plt.show()

# Preprocess Images

In [None]:
def crop_image(img, top=0, bottom=0, left=0, right=0):
    """
    Crops a fixed number of pixels from the left and right sides of an image.

    Args:
        img (np.ndarray): The input image.
        left_crop (int): Pixels to remove from the left.
        right_crop (int): Pixels to remove from the right.

    Returns:
        np.ndarray: Cropped image.
    """
    h, w = img.shape[:2]
    return img[top:h - bottom, left:w - right]

In [None]:
def slice_left_to_right(img, num_parts=5):
    """
    Slice an image left-to-right into equal-width vertical slices.

    Args:
        img (np.ndarray): The input image.
        num_parts (int): Number of left-to-right vertical slices.

    Returns:
        list of np.ndarray: List of sliced character images.
    """
    h, w = img.shape[:2]
    part_width = w // num_parts
    part_width = 9
    slices = []

    for i in range(num_parts):
        x_start = i * part_width
        x_end = (i + 1) * part_width
        slice_img = img[:, x_start:x_end]  # all rows, columns i
        slices.append(slice_img)

    return slices


# Isolate images into individual characters

In [None]:
text_file_path = 'sampleCaptchas/output/'

counter = 1
data = []
for img_name in image_files:
    print(img_name)
    text_file = img_name.replace('jpg', 'txt').replace('input', 'output')
    with open(text_file_path + text_file, 'r') as f:
        for line in f:
            print(line.strip())
        
    img_path = os.path.join(image_folder, img_name)
    img = cv2.imread(img_path)
    img = crop_image(img, top=11, bottom=9, left=5, right=11)
    _, img = cv2.threshold(img, 50, 255, cv2.THRESH_BINARY_INV)
    img = cv2.bitwise_not(img)
    
    parts = slice_left_to_right(img)

    # Display the 5 vertical slices
    for i, part in enumerate(parts):
        plt.subplot(1, 5, i + 1)
        plt.imshow(cv2.cvtColor(part, cv2.COLOR_BGR2RGB))
        plt.axis('off')
        plt.title(line.strip()[i])
        path_save = f"sampleCaptchas/data/{str(counter).rjust(4, '0')}.jpg"
        cv2.imwrite(path_save, part)
        data.append([path_save, line.strip()[i]])
        counter += 1
    plt.tight_layout()
    plt.show()
    
df = pd.DataFrame(data, columns=['path', 'value'])
df.to_csv('sampleCaptchas/data.csv', index=False)

In [None]:
data

# Create CNN model to classify characters

In [None]:
# Characters mapping
CHARS = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ'
char_to_idx = {c: i for i, c in enumerate(CHARS)}
idx_to_char = {i: c for i, c in enumerate(CHARS)}

In [None]:
# Dataset class
class CaptchaDataset(Dataset):
    def __init__(self, csv_file, root_dir, transform=None):
        self.data = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        img_name = os.path.join(self.root_dir, self.data.iloc[idx, 0])
        image = Image.open(img_name).convert('L')
        label_char = self.data.iloc[idx, 1]
        label = char_to_idx[label_char]

        if self.transform:
            image = self.transform(image)

        return image, label

In [None]:
# Simple CNN model
class SimpleCNN(nn.Module):
    def __init__(self, num_classes=len(CHARS)):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, 1)
        self.conv2 = nn.Conv2d(32, 64, 3, 1)
        self.dropout1 = nn.Dropout(0.25)
        self.fc1 = nn.Linear(9216, 128)
        self.dropout2 = nn.Dropout(0.5)
        self.fc2 = nn.Linear(128, num_classes)
    
    def forward(self, x):
        x = F.relu(self.conv1(x))  
        x = F.relu(self.conv2(x))  
        x = F.max_pool2d(x, 2)
        x = self.dropout1(x)
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        x = self.dropout2(x)
        x = self.fc2(x)
        return x

In [None]:
# Training parameters
BATCH_SIZE = 64
EPOCHS = 100
LEARNING_RATE = 0.001

# Data transforms
transform = transforms.Compose([
    transforms.Resize((28, 28)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# Prepare dataset and dataloader
dataset = CaptchaDataset(csv_file='sampleCaptchas/data.csv', root_dir='', transform=transform)
dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)

In [None]:
# Device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Model, loss, optimizer
model = SimpleCNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

In [None]:
# Training loop
for epoch in range(EPOCHS):
    model.train()
    running_loss = 0
    for images, labels in dataloader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f"Epoch {epoch+1}/{EPOCHS}, Loss: {running_loss/len(dataloader):.4f}")

# Save the model
torch.save(model.state_dict(), 'captcha_model.pth')

print("Training complete and model saved.")

# Perform Prediction

In [None]:
def predict_character(image_path, model_path='captcha_model.pth'):
    # Load model
    model = SimpleCNN()
    model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))
    model.eval()

    # Prepare transform
    transform = transforms.Compose([
        transforms.Resize((28, 28)),
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))
    ])

    # Load and preprocess image
    image = Image.open(image_path).convert('L')
    input_tensor = transform(image).unsqueeze(0)  # add batch dimension

    # Predict
    with torch.no_grad():
        output = model(input_tensor)
        pred_idx = output.argmax(dim=1).item()
        predicted_char = idx_to_char[pred_idx]

    return predicted_char

In [None]:
print(predict_character('sampleCaptchas/input/input00.jpg'))

In [None]:
text_file_path = 'sampleCaptchas/output/'

counter = 1
data = []
for img_name in image_files:
    print(img_name)
    text_file = img_name.replace('jpg', 'txt').replace('input', 'output')
    with open(text_file_path + text_file, 'r') as f:
        for line in f:
            print(line.strip())
        
    img_path = os.path.join(image_folder, img_name)
    img = cv2.imread(img_path)
    img = crop_image(img, top=11, bottom=9, left=5, right=11)
    _, img = cv2.threshold(img, 50, 255, cv2.THRESH_BINARY_INV)
    img = cv2.bitwise_not(img)
    
    parts = slice_left_to_right(img)

    # Display the 5 vertical slices
    for i, part in enumerate(parts):
        plt.subplot(1, 5, i + 1)
        plt.imshow(cv2.cvtColor(part, cv2.COLOR_BGR2RGB))
        plt.axis('off')
        
        path_save = f"sampleCaptchas/data/{str(counter).rjust(4, '0')}.jpg"
        cv2.imwrite(path_save, part)
        
        pred = predict_character(path_save)
        
        plt.title(line.strip()[i] + ' -> ' + pred)
        data.append([path_save, line.strip()[i], pred])
        counter += 1
    plt.tight_layout()
    plt.show()
    
df = pd.DataFrame(data, columns=['path', 'value', 'pred'])
df.to_csv('sampleCaptchas/data_pred.csv', index=False)

In [None]:
pred = df['pred']
true = df['value']

# Evaluate performance

In [None]:
correct = 0
total = len(data)  # where data is list of [path, true_label, pred_label]

for _, true_label, pred_label in data:
    if true_label == pred_label:
        correct += 1

accuracy = correct / total
print(f'Accuracy: {accuracy:.4f}')

In [None]:
wrong_predictions = []

for path, true_label, pred_label in data:
    if true_label != pred_label:
        wrong_predictions.append((path, true_label, pred_label))

print(f"Total wrong predictions: {len(wrong_predictions)}")
for item in wrong_predictions:
    print(f"Image: {item[0]}, True: {item[1]}, Predicted: {item[2]}")


# Final deliverable for Captcha Class

In [None]:
class Captcha(object):
    def __init__(self, model_path='captcha_model.pth'):
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.model = SimpleCNN()
        self.model.load_state_dict(torch.load(model_path, map_location=self.device))
        self.model.to(self.device)
        self.model.eval()
        
        self.transform = transforms.Compose([
            transforms.Resize((28, 28)),
            transforms.ToTensor(),
            transforms.Normalize((0.5,), (0.5,))
        ])

    def predict_character(self, img_array):
        """Predict single character from numpy grayscale image."""
        pil_img = Image.fromarray(img_array)
        input_tensor = self.transform(pil_img).unsqueeze(0).to(self.device)
        with torch.no_grad():
            output = self.model(input_tensor)
            pred_idx = output.argmax(dim=1).item()
            return idx_to_char[pred_idx]

    def __call__(self, im_path, save_path):
        # Load and preprocess image
        img = cv2.imread(im_path)
        img = crop_image(img, top=11, bottom=9, left=5, right=11)

        # Convert to grayscale if needed
        if len(img.shape) == 3 and img.shape[2] == 3:
            img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        else:
            img_gray = img
        
        _, img_thresh = cv2.threshold(img_gray, 50, 255, cv2.THRESH_BINARY_INV)
        img_inv = cv2.bitwise_not(img_thresh)

        # Slice into 5 parts
        parts = slice_left_to_right(img_inv, num_parts=5)

        predicted_chars = []
        for part in parts:
            pred_char = self.predict_character(part)
            predicted_chars.append(pred_char)
        
        predicted_text = ''.join(predicted_chars)
        
        # Save prediction to file
        with open(save_path, 'w') as f:
            f.write(predicted_text)
        
        return predicted_text

In [None]:
captcha = Captcha(model_path='captcha_model.pth')

In [None]:
input_img = 'sampleCaptchas/input/input01.jpg'
output_txt = 'sampleCaptchas/pred/output01.txt'
print(captcha('sampleCaptchas/input/input01.jpg', 'sampleCaptchas/pred/output01.txt'))

In [None]:
img_path = os.path.join(input_img)
img = cv2.imread(img_path)

# Convert from BGR to RGB for matplotlib
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

plt.imshow(img_rgb)
plt.title(img_name)
plt.axis('off')
plt.show()