In [3]:
conda install pytorch torchvision torchaudio cpuonly -c pytorch


Retrieving notices: ...working... done
Channels:
 - pytorch
 - defaults
 - conda-forge
Platform: win-64
Collecting package metadata (repodata.json): ...working... done
Solving environment: ...working... done

## Package Plan ##

  environment location: c:\Users\conno\anaconda3\envs\ee467

  added / updated specs:
    - cpuonly
    - pytorch
    - torchaudio
    - torchvision


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    _openmp_mutex-4.5          |            2_gnu          48 KB  conda-forge
    brotlicffi-1.2.0.0         |  py311h885b0b7_0         348 KB
    certifi-2026.01.04         |  py311haa95532_0         148 KB
    cffi-2.0.0                 |  py311h02ab6af_1         298 KB
    charset-normalizer-3.4.4   |  py311haa95532_0         125 KB
    cpuonly-2.0                |                0           2 KB  pytorch
    filelock-3.20.3            |  py311haa95532_0          39 KB

In [4]:
import os
import random
import pickle

import cv2
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split


# ---------------------------
# Config
# ---------------------------
TVT_SPLIT_SEED = 31528476
TRAIN_VAL_SEED = 955996

CHAR_IMAGE_FOLDER = f"./char-images-{TVT_SPLIT_SEED}"
LABELS_PATH = "./labels.pkl"  # optional
MODEL_WEIGHTS_PATH_PT = "./captcha-model-pytorch.pt"

BATCH_SIZE = 32
N_EPOCHS = 10
FORCE_TRAINING = True

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"


# ---------------------------
# Utils
# ---------------------------
def resize_to_fit(image, width=20, height=20):
    """Resize (keep aspect ratio) then pad to exactly (height, width)."""
    if image is None:
        raise ValueError("resize_to_fit got None image")

    h, w = image.shape[:2]
    if h == 0 or w == 0:
        raise ValueError(f"Bad image shape: {image.shape}")

    # Scale to fit
    if w > h:
        new_w = width
        new_h = max(1, int(h * (width / w)))
    else:
        new_h = height
        new_w = max(1, int(w * (height / h)))

    image = cv2.resize(image, (new_w, new_h), interpolation=cv2.INTER_AREA)

    # Pad to target size
    padW = (width - new_w) // 2
    padH = (height - new_h) // 2
    image = cv2.copyMakeBorder(
        image,
        top=padH,
        bottom=height - new_h - padH,
        left=padW,
        right=width - new_w - padW,
        borderType=cv2.BORDER_CONSTANT,
        value=0
    )
    return image


def load_classes(char_folder, labels_path):
    """Load class names from labels.pkl if possible, otherwise from folder names."""
    classes = None

    if os.path.exists(labels_path):
        try:
            with open(labels_path, "rb") as f:
                lb = pickle.load(f)
            classes = list(lb.classes_)
            print(f"[info] Loaded {len(classes)} classes from {labels_path}")
        except Exception as e:
            print(f"[warn] Could not load {labels_path}; falling back to folder names: {repr(e)}")

    if classes is None:
        classes = sorted([
            d for d in os.listdir(char_folder)
            if os.path.isdir(os.path.join(char_folder, d))
        ])
        print(f"[info] Inferred {len(classes)} classes from {char_folder}")

    class_to_idx = {c: i for i, c in enumerate(classes)}
    idx_to_class = {i: c for c, i in class_to_idx.items()}
    return classes, class_to_idx, idx_to_class


# ---------------------------
# Dataset
# ---------------------------
class CharImagesDataset(Dataset):
    def __init__(self, root_dir, class_to_idx, target_size=(20, 20), validate=True):
        self.root_dir = root_dir
        self.class_to_idx = class_to_idx
        self.target_w, self.target_h = target_size

        samples = []
        for label in sorted(os.listdir(root_dir)):
            label_dir = os.path.join(root_dir, label)
            if not os.path.isdir(label_dir):
                continue
            if label not in class_to_idx:
                continue

            for fname in os.listdir(label_dir):
                if fname.lower().endswith(".png"):
                    samples.append((os.path.join(label_dir, fname), class_to_idx[label]))

        if len(samples) == 0:
            raise RuntimeError(f"No PNG samples found under {root_dir}")

        if validate:
            good = []
            bad = 0
            for path, y in samples:
                img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
                if img is None or img.size == 0:
                    bad += 1
                    continue
                good.append((path, y))
            if bad:
                print(f"[warn] Skipped {bad} unreadable PNGs during indexing.")
            samples = good

        self.samples = samples

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        path, y = self.samples[idx]

        img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
        if img is None or img.size == 0:
            raise ValueError(f"Could not read image: {path}")

        img = resize_to_fit(img, self.target_w, self.target_h)
        x = torch.tensor(img, dtype=torch.float32).unsqueeze(0) / 255.0  # (1, 20, 20)
        y = torch.tensor(y, dtype=torch.long)
        return x, y


# ---------------------------
# Model (same as TF architecture)
# ---------------------------
class CaptchaCharCNN(nn.Module):
    def __init__(self, n_classes):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, 20, kernel_size=5, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(20, 50, kernel_size=5, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(2),
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(5 * 5 * 50, 500),
            nn.ReLU(),
            nn.Linear(500, n_classes),  # logits
        )

    def forward(self, x):
        return self.classifier(self.features(x))


# ---------------------------
# Train/Eval
# ---------------------------
def run_epoch(model, loader, optimizer, criterion, train=True):
    model.train(train)
    total_loss, correct, total = 0.0, 0, 0

    for x, y in loader:
        x, y = x.to(DEVICE), y.to(DEVICE)

        if train:
            optimizer.zero_grad()

        logits = model(x)
        loss = criterion(logits, y)

        if train:
            loss.backward()
            optimizer.step()

        total_loss += loss.item() * x.size(0)
        preds = torch.argmax(logits, dim=1)
        correct += (preds == y).sum().item()
        total += x.size(0)

    return total_loss / total, correct / total


@torch.no_grad()
def eval_accuracy(model, loader):
    model.eval()
    correct, total = 0, 0
    for x, y in loader:
        x, y = x.to(DEVICE), y.to(DEVICE)
        logits = model(x)
        preds = torch.argmax(logits, dim=1)
        correct += (preds == y).sum().item()
        total += x.size(0)
    return correct / total if total else 0.0


def main():
    # Basic checks
    if not os.path.isdir(CHAR_IMAGE_FOLDER):
        raise FileNotFoundError(
            f"Missing {CHAR_IMAGE_FOLDER}.\n"
            "Run your TensorFlow notebook extraction first so this folder exists."
        )

    print("[info] DEVICE:", DEVICE)

    # Seeds
    random.seed(TVT_SPLIT_SEED)
    np.random.seed(TVT_SPLIT_SEED)
    torch.manual_seed(TVT_SPLIT_SEED)

    # Classes
    classes, class_to_idx, idx_to_class = load_classes(CHAR_IMAGE_FOLDER, LABELS_PATH)
    n_classes = len(classes)
    print("[info] n_classes:", n_classes)

    # Dataset
    dataset = CharImagesDataset(CHAR_IMAGE_FOLDER, class_to_idx, target_size=(20, 20), validate=True)
    print("[info] Total samples:", len(dataset))

    # Split
    n_total = len(dataset)
    n_val = int(0.25 * n_total)
    n_train = n_total - n_val

    train_ds, val_ds = random_split(
        dataset,
        [n_train, n_val],
        generator=torch.Generator().manual_seed(TRAIN_VAL_SEED)
    )

    train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)
    val_loader = DataLoader(val_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)

    # Model
    model = CaptchaCharCNN(n_classes).to(DEVICE)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters())

    # Train or load
    if (not FORCE_TRAINING) and os.path.exists(MODEL_WEIGHTS_PATH_PT):
        ckpt = torch.load(MODEL_WEIGHTS_PATH_PT, map_location=DEVICE)
        model.load_state_dict(ckpt["model_state_dict"])
        print("[info] Loaded weights:", MODEL_WEIGHTS_PATH_PT)
    else:
        for epoch in range(1, N_EPOCHS + 1):
            train_loss, train_acc = run_epoch(model, train_loader, optimizer, criterion, train=True)
            val_loss, val_acc = run_epoch(model, val_loader, optimizer, criterion, train=False)
            print(f"Epoch {epoch:02d}/{N_EPOCHS} | "
                  f"train loss {train_loss:.4f} acc {train_acc:.4f} | "
                  f"val loss {val_loss:.4f} acc {val_acc:.4f}")

        torch.save({
            "model_state_dict": model.state_dict(),
            "classes": classes,
            "class_to_idx": class_to_idx,
            "idx_to_class": idx_to_class,
            "n_classes": n_classes,
            "tvt_split_seed": TVT_SPLIT_SEED,
            "train_val_seed": TRAIN_VAL_SEED,
        }, MODEL_WEIGHTS_PATH_PT)
        print("[info] Saved weights:", MODEL_WEIGHTS_PATH_PT)

    # Final eval
    acc = eval_accuracy(model, val_loader)
    print("[result] Validation accuracy:", acc)


if __name__ == "__main__":
    main()


[info] DEVICE: cpu
[info] Inferred 32 classes from ./char-images-31528476
[info] n_classes: 32
[info] Total samples: 3580
Epoch 01/10 | train loss 2.3073 acc 0.3676 | val loss 0.5806 acc 0.8581
Epoch 02/10 | train loss 0.2946 acc 0.9274 | val loss 0.1448 acc 0.9698
Epoch 03/10 | train loss 0.0884 acc 0.9832 | val loss 0.0918 acc 0.9866
Epoch 04/10 | train loss 0.0533 acc 0.9899 | val loss 0.1260 acc 0.9732
Epoch 05/10 | train loss 0.0347 acc 0.9918 | val loss 0.0932 acc 0.9765
Epoch 06/10 | train loss 0.0147 acc 0.9978 | val loss 0.0559 acc 0.9922
Epoch 07/10 | train loss 0.0049 acc 0.9993 | val loss 0.0639 acc 0.9888
Epoch 08/10 | train loss 0.0154 acc 0.9952 | val loss 0.0574 acc 0.9922
Epoch 09/10 | train loss 0.0046 acc 0.9989 | val loss 0.0554 acc 0.9955
Epoch 10/10 | train loss 0.0104 acc 0.9981 | val loss 0.0529 acc 0.9955
[info] Saved weights: ./captcha-model-pytorch.pt
[result] Validation accuracy: 0.9955307262569832
