In [None]:
import os, pickle
import cv2
import numpy as np
from imutils import paths
import imutils
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

from lab_2_helpers import *

CAPTCHA_IMAGE_FOLDER = "./captcha-images"
TVT_SPLIT_SEED = 31528476
CHAR_IMAGE_FOLDER = f"./char-images-{TVT_SPLIT_SEED}"
MODEL_WEIGHTS_PATH = "./captcha-model.pt"
LABELS_PATH = "./labels.pkl"

BATCH_SIZE = 32
N_EPOCHS = 10

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

captcha_image_paths = list(paths.list_images(CAPTCHA_IMAGE_FOLDER))

def extract_captcha_text(image_path):
    return os.path.splitext(os.path.basename(image_path))[0]

captcha_texts = [extract_captcha_text(p) for p in captcha_image_paths]

def load_transform_image(image_path):
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    image = cv2.copyMakeBorder(image, 8, 8, 8, 8, cv2.BORDER_CONSTANT, value=255)
    return image

captcha_images = [load_transform_image(p) for p in captcha_image_paths]

captcha_images_tv, captcha_images_test, captcha_texts_tv, captcha_texts_test = train_test_split(
    captcha_images, captcha_texts, test_size=0.2, random_state=TVT_SPLIT_SEED
)

def extract_chars(image):
    image = image.astype("uint8")

    thresh = cv2.threshold(
        image, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU
    )[1]

    contours = cv2.findContours(
        thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
    )[0]

    regions = []
    for c in contours:
        x, y, w, h = cv2.boundingRect(c)
        if w / h > 1.25:
            half = w // 2
            regions.append((x, y, half, h))
            regions.append((x + half, y, half, h))
        else:
            regions.append((x, y, w, h))

    if len(regions) < 4:
        return None

    regions = sorted(regions, key=lambda r: r[2] * r[3], reverse=True)[:4]
    regions = sorted(regions, key=lambda r: r[0])

    chars = []
    H, W = image.shape
    for x, y, w, h in regions:
        x1, y1 = max(x - 2, 0), max(y - 2, 0)
        x2, y2 = min(x + w + 2, W), min(y + h + 2, H)
        char = image[y1:y2, x1:x2]
        if char.size == 0:
            return None
        chars.append(char)

    return chars

char_counts = {}

def save_chars(char_images, captcha_text):
    for img, ch in zip(char_images, captcha_text):
        folder = os.path.join(CHAR_IMAGE_FOLDER, ch)
        os.makedirs(folder, exist_ok=True)
        count = char_counts.get(ch, 1)
        cv2.imwrite(os.path.join(folder, f"{count}.png"), img)
        char_counts[ch] = count + 1

if not os.path.exists(CHAR_IMAGE_FOLDER):
    for img, txt in zip(captcha_images_tv, captcha_texts_tv):
        chars = extract_chars(img)
        if chars:
            save_chars(chars, txt)

def make_feature(image):
    image = resize_to_fit(image, 20, 20)
    image = image.astype("float32") / 255.0
    return image[np.newaxis, :, :]

def make_feature_label(path):
    img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
    return make_feature(img), path.split(os.path.sep)[-2]

features, labels = zip(*[
    make_feature_label(p) for p in paths.list_images(CHAR_IMAGE_FOLDER)
])

features = np.array(features)
lb = LabelBinarizer()
labels_oh = lb.fit_transform(labels)

with open(LABELS_PATH, "wb") as f:
    pickle.dump(lb, f)

y = np.argmax(labels_oh, axis=1)

X_train, X_val, y_train, y_val = train_test_split(
    features, y, test_size=0.25, random_state=955996
)

class CharDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.long)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

train_loader = DataLoader(CharDataset(X_train, y_train), batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(CharDataset(X_val, y_val), batch_size=BATCH_SIZE)

class CaptchaCNN(nn.Module):
    def __init__(self, n_classes):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv2d(1, 20, 5, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(20, 50, 5, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Flatten(),
            nn.Linear(50 * 5 * 5, 500),
            nn.ReLU(),
            nn.Linear(500, n_classes)
        )

    def forward(self, x):
        return self.net(x)

model = CaptchaCNN(len(lb.classes_)).to(device)
optimizer = optim.Adam(model.parameters())
criterion = nn.CrossEntropyLoss()

for epoch in range(N_EPOCHS):
    model.train()
    for Xb, yb in train_loader:
        Xb, yb = Xb.to(device), yb.to(device)
        optimizer.zero_grad()
        loss = criterion(model(Xb), yb)
        loss.backward()
        optimizer.step()

torch.save(model.state_dict(), MODEL_WEIGHTS_PATH)

model.load_state_dict(torch.load(MODEL_WEIGHTS_PATH, map_location=device))
model.eval()

with open(LABELS_PATH, "rb") as f:
    lb = pickle.load(f)

preds_test = []

with torch.no_grad():
    for img in captcha_images_test:
        chars = extract_chars(img)
        if chars is None:
            preds_test.append("-")
            continue

        out = []
        for c in chars:
            feat = make_feature(c)
            feat = torch.tensor(feat).unsqueeze(0).to(device)
            idx = model(feat).argmax(dim=1).item()
            out.append(lb.classes_[idx])
        preds_test.append("".join(out))

n_correct = sum(p == a for p, a in zip(preds_test, captcha_texts_test))

print("# of test CAPTCHAs:", len(captcha_texts_test))
print("# correctly recognized:", n_correct)
print("Accuracy:", n_correct / len(captcha_texts_test))

# of test CAPTCHAs: 228
# correctly recognized: 216
Accuracy: 0.9473684210526315
