# Basic CNN Pipeline

### 1. Load and preprocess data

In [1]:
import torch
import numpy as np

from helper_functions.encode import (
    prepare_training_data, CHARACTERS, 
)

train_folder = "../data/train/combine"
test_folder = "../data/test/combine"

print("Preparing training data...")
X_train, _, y_train_onehot, _, _ = prepare_training_data(train_folder)
y_train = torch.tensor(np.argmax(y_train_onehot, axis=1), dtype=torch.long)

print("Preparing test data...")
X_test, _, y_test_onehot, _, _ = prepare_training_data(test_folder)
y_test = torch.tensor(np.argmax(y_test_onehot, axis=1), dtype=torch.long)

Preparing training data...


Preparing Data: 100%|██████████| 7437/7437 [00:37<00:00, 197.87it/s]


Preparing test data...


Preparing Data: 100%|██████████| 1894/1894 [00:10<00:00, 182.67it/s]


### 2. Instantiate basic CNN model and train it

In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader

from helper_functions.models.basic_cnn import CharDataset, CNN


# Create dataloaders
train_loader = DataLoader(CharDataset(X_train, y_train), batch_size=32, shuffle=True)
test_loader = DataLoader(CharDataset(X_test, y_test), batch_size=32, shuffle=False)

# Initialize model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = CNN(num_classes=len(CHARACTERS)).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
print("Training model...")
for epoch in range(100):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
    
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item() * inputs.size(0)
    
    print(f"Epoch {epoch+1} Loss: {running_loss/len(train_loader.dataset):.4f}")


Training model...
Epoch 1 Loss: 1.5879
Epoch 2 Loss: 0.8562
Epoch 3 Loss: 0.6842
Epoch 4 Loss: 0.5763
Epoch 5 Loss: 0.5010
Epoch 6 Loss: 0.4395
Epoch 7 Loss: 0.3888
Epoch 8 Loss: 0.3436
Epoch 9 Loss: 0.3105
Epoch 10 Loss: 0.2778
Epoch 11 Loss: 0.2465
Epoch 12 Loss: 0.2259
Epoch 13 Loss: 0.2048
Epoch 14 Loss: 0.1894
Epoch 15 Loss: 0.1676
Epoch 16 Loss: 0.1553
Epoch 17 Loss: 0.1472
Epoch 18 Loss: 0.1287
Epoch 19 Loss: 0.1242
Epoch 20 Loss: 0.1164
Epoch 21 Loss: 0.1080
Epoch 22 Loss: 0.1068
Epoch 23 Loss: 0.0932
Epoch 24 Loss: 0.0960
Epoch 25 Loss: 0.0872
Epoch 26 Loss: 0.0902
Epoch 27 Loss: 0.0873
Epoch 28 Loss: 0.0696
Epoch 29 Loss: 0.0779
Epoch 30 Loss: 0.0777
Epoch 31 Loss: 0.0736
Epoch 32 Loss: 0.0656
Epoch 33 Loss: 0.0623
Epoch 34 Loss: 0.0669
Epoch 35 Loss: 0.0584
Epoch 36 Loss: 0.0740
Epoch 37 Loss: 0.0605
Epoch 38 Loss: 0.0500
Epoch 39 Loss: 0.0555
Epoch 40 Loss: 0.0557
Epoch 41 Loss: 0.0553
Epoch 42 Loss: 0.0541
Epoch 43 Loss: 0.0571
Epoch 44 Loss: 0.0465
Epoch 45 Loss: 0.0461
E

In [6]:
from helper_functions.models.basic_cnn import evaluate_character_level, evaluate_captcha_level

"""
3. Evaluate basic CNN model
"""
print("\nCharacter-Level Evaluation:")
char_metrics = evaluate_character_level(model, test_loader, device)
for metric, value in char_metrics.items():
    print(f"{metric.capitalize()}: {value:.4f}")

print("\nCaptcha-Level Evaluation:")
captcha_acc = evaluate_captcha_level(model, test_folder, device)
print(f"Accuracy: {captcha_acc:.4f}")


Character-Level Evaluation:
Accuracy: 0.8036
Precision: 0.8080
Recall: 0.8011
F1: 0.8034

Captcha-Level Evaluation:


Evaluating CAPTCHAs: 100%|██████████| 1894/1894 [00:17<00:00, 105.83it/s]

Accuracy: 0.3036



