In [55]:
import torch
import torch.nn as nn
import torch.nn.functional as F     
from torch.utils.data import DataLoader, Dataset, ConcatDataset
from torchvision import transforms
from PIL import Image
import pandas as pd
import os
    

In [56]:
class CNN(nn.Module):
    def __init__(self, input_chnls, num_classes):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels = input_chnls, out_channels = 16, kernel_size = 5, stride = 1, padding = 2)
        self.pool = nn.MaxPool2d(kernel_size = 2, stride = 2, padding = 0)
        self.conv2 = nn.Conv2d(in_channels = 16, out_channels = 32, kernel_size = 5, stride = 1, padding = 2)
        self.fc1 = nn.Linear(32*62*25, 120)
        self.fc2 = nn.Linear(120, num_classes)
        self.dropout = nn.Dropout(0.2) 

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)  # Flatten using batch size and -1 for auto calculation
        x = F.relu(self.fc1(x))
        x = self.dropout(x)  
        x = self.fc2(x)
        return x


In [57]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [58]:
device

device(type='cuda')

In [59]:
model = CNN(3, 100).to(device)

In [60]:
class CustomImageDataset(Dataset):
    def __init__(self, csv_file, img_dir, transforms = None, class_to_idx = None):
        self.labels_df = pd.read_csv(csv_file)
        self.img_dir = img_dir 
        self.transforms = transforms
        self.class_to_idx = class_to_idx



    def __len__(self):
        return len(self.labels_df)
    
    def __getitem__(self, idx):
        img_name = self.labels_df.iloc[idx, 0]
        img_path = os.path.join(self.img_dir, img_name)

        image = Image.open(img_path)
        label = self.labels_df.iloc[idx, 1]
        y_label = self.class_to_idx[label]

        if self.transforms:
            image = self.transforms(image)

        return image, y_label

In [61]:
transform = transforms.Compose([
    transforms.Resize((250, 100)),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])


In [62]:
with open('ai_wordlist.txt', 'r') as f:
    words = [line.strip() for line in f.readlines()]

class_to_idx = {word: idx for idx, word in enumerate(words)}

len(class_to_idx)



100

In [63]:
train_dataset_easy = CustomImageDataset(
    csv_file = 'captcha_images/train/labels_easy.csv',
    img_dir = 'captcha_images/train/easy',
    transforms = transform,
    class_to_idx = class_to_idx
)

train_dataset_hard = CustomImageDataset(
    csv_file = 'captcha_images/train/labels_hard.csv',
    img_dir = 'captcha_images/train/hard',
    transforms = transform,
    class_to_idx = class_to_idx
)

train_dataset = ConcatDataset([train_dataset_easy, train_dataset_hard])

test_dataset_easy = CustomImageDataset(
    csv_file = 'captcha_images/test/labels_easy.csv',
    img_dir = 'captcha_images/test/easy',
    transforms = transform,
    class_to_idx = class_to_idx
)

test_dataset_hard = CustomImageDataset(
    csv_file = 'captcha_images/test/labels_hard.csv',
    img_dir = 'captcha_images/test/hard',
    transforms = transform,
    class_to_idx = class_to_idx
)

test_dataset = ConcatDataset([test_dataset_easy, test_dataset_hard])

In [64]:

train_dataloader = DataLoader(train_dataset, batch_size = 16, shuffle = True)
test_dataloader = DataLoader(test_dataset, batch_size = 16, shuffle = False)

In [65]:
optimizer = torch.optim.Adam(model.parameters(), lr = 0.001)

In [66]:
criterion = nn.CrossEntropyLoss()

In [None]:
num_epochs = 50

In [70]:
all_labels = []
for _, labels in train_dataloader:
    all_labels.extend(labels.tolist())

print(min(all_labels), max(all_labels))  # should be 0 and num_classes-1
print(len(set(all_labels)))  # should equal num_classes


0 99
100


In [69]:
def check_accuracy(loader, model):
    num_correct = 0
    num_samples = 0

    with torch.no_grad():
        for x, y in loader:
            x = x.to(device = device)
            y = y.to(device = device)
            scores = model(x)
            _, predictions = scores.max(1)
            num_correct += (predictions == y).sum()
            num_samples += predictions.size(0)
    acc = float(num_correct) / num_samples
    print(f'Got {num_correct} / {num_samples} with accuracy {acc * 100:.2f}')
    return acc

check_accuracy(train_dataloader, model)
check_accuracy(test_dataloader, model)

Got 960 / 1000 with accuracy 96.00
Got 199 / 500 with accuracy 39.80


0.398

- Number of epochs as 10 - 1% accuracy
- Number of epochs as 100 - 100% accuracy
- Right now model is just memorizing, need to add more samples.