In [1]:
import torch
import os
import cv2
import numpy as np
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import joblib

In [2]:
label2int_dict = {}
for i in range(62):
    if i < 10:
        label2int_dict[str(i)] = i
    
    elif i <36:
        label2int_dict[chr(97 + i - 10)] = i
    
    else:
        label2int_dict[chr(65 + i - 36)] = i

int2label_dict = {}
for key in label2int_dict:
    int2label_dict[label2int_dict[key]] = key

print(label2int_dict)

{'0': 0, '1': 1, '2': 2, '3': 3, '4': 4, '5': 5, '6': 6, '7': 7, '8': 8, '9': 9, 'a': 10, 'b': 11, 'c': 12, 'd': 13, 'e': 14, 'f': 15, 'g': 16, 'h': 17, 'i': 18, 'j': 19, 'k': 20, 'l': 21, 'm': 22, 'n': 23, 'o': 24, 'p': 25, 'q': 26, 'r': 27, 's': 28, 't': 29, 'u': 30, 'v': 31, 'w': 32, 'x': 33, 'y': 34, 'z': 35, 'A': 36, 'B': 37, 'C': 38, 'D': 39, 'E': 40, 'F': 41, 'G': 42, 'H': 43, 'I': 44, 'J': 45, 'K': 46, 'L': 47, 'M': 48, 'N': 49, 'O': 50, 'P': 51, 'Q': 52, 'R': 53, 'S': 54, 'T': 55, 'U': 56, 'V': 57, 'W': 58, 'X': 59, 'Y': 60, 'Z': 61}


In [3]:
def separate(full_image):
    # vertical cutting
    prev = False
    prev2 = False
    arr = []
    for i in range(full_image[0].shape[0]):

        if np.all(full_image[:, i] < 127) and not prev:
            arr.append(i)
            prev = True
            prev2 = False
        elif not np.all(full_image[:, i] < 127) and not prev2:
            arr.append(i)
            prev2 = True
            prev = False
    # --------------
    
    crop_img = []

    for i in range((len(arr) - 1) // 2):
        image = full_image[:, arr[2*i + 1]: arr[2*i + 2]]

        # horizontal cutting
        prev = False
        prev2 = False
        arr2 = []
        for j in range(image.shape[0]):
            if np.all(image[j, :] < 127) and not prev:
                arr2.append(j)
                prev = True
                prev2 = False

            elif not np.all(image[j, :] < 127) and not prev2:
                arr2.append(j)
                prev2 = True
                prev = False

        image = image[arr2[1]: arr2[-1], :]
        # --------------

        # padding
        pad_width = int((image.shape[0] - image.shape[1]) / 2)
        pad_size = 5
        if pad_width > 0:
            image = np.pad(image, ((pad_size, pad_size), (pad_width + pad_size, pad_width + pad_size)), mode="constant")
        
        else:
            image = np.pad(image, ((pad_size - pad_width, pad_size - pad_width), (pad_size, pad_size)), mode="constant")
        # ----------
        
        resized_image = cv2.resize(image, (28, 28), cv2.INTER_CUBIC)

        final_img = torch.from_numpy(resized_image)
        crop_img.append(final_img)

    return crop_img    

In [4]:

input_data = torch.empty(24000, 28, 28)
label_data = torch.empty(24000, dtype=int)

k = 0
for file in os.listdir("data"):
    
    image = cv2.imread(os.path.join("data", file))

    process_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    threshold, process_image = cv2.threshold(process_image, 0, 255, cv2.THRESH_OTSU)

    
    img_list = separate(process_image)

    for  i in range(len(img_list)):
        input_data[k] = img_list[i]
        label_data[k] = label2int_dict[file[i]]

        k += 1
        if k == 24000:
            break

    if k == 24000:
        break

In [5]:
train_input = input_data[:20000]
train_label = label_data[:20000]

test_input = input_data[20000:]
test_label = label_data[20000:]

In [6]:
class dataset():
    def __init__(self, inputs, labels):
        self.inputs = inputs
        self.labels = labels
    
    def __len__(self):
        return len(self.inputs)
    
    def __getitem__(self, index):
        return self.inputs[index], self.labels[index]


dataset = dataset(train_input, train_label)

batch_size = 8

dataloader = DataLoader(dataset, batch_size, shuffle=True, drop_last=True)

In [7]:
class Classifier(nn.Module):
    def __init__(self, n_labels, loader_size, lr):
        super().__init__()
        self.lr = lr
        self.n_labels = n_labels
        self.loader_size = loader_size
        # layers
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, stride=1)
        self.pool2 = nn.AvgPool2d(2)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1)
        self.pool2 = nn.AvgPool2d(2)

        self.fc1 = nn.Linear(800, 256)

        self.fc2 = nn.Linear(256, n_labels)

        # loss function
        self.lossFn = nn.MSELoss()

        # optimizer
        self.optimizer = torch.optim.SGD(self.parameters(), self.lr)


    # output function
    def output(self,input):
        conv1_out = nn.ReLU()(self.conv1(input))
        pool1_out = self.pool2(conv1_out)
        conv2_out = nn.ReLU()(self.conv2(pool1_out))
        pool2_out = self.pool2(conv2_out)


        flatten_out = pool2_out.view(pool2_out.shape[0], -1)

        out = torch.empty(flatten_out.shape[0], self.n_labels)

        for i in range(flatten_out.shape[0]):
            fc1_out = nn.ReLU()(self.fc1(flatten_out[i]))
            
            fc2_out = self.fc2(fc1_out)

            softmax_out = nn.Softmax(dim=-1)(fc2_out)

            out[i] = softmax_out

        return out
    
    # training function
    def train(self, dataloader, n_epochs):
        for i in range(n_epochs):

            epoch_loss = 0
            

            for inputs, labels in dataloader:            
                inputs = inputs.unsqueeze(1)
                output = self.output(inputs)

                hot_enco = torch.empty(self.loader_size, self.n_labels)
                for j in range(self.loader_size):
                    hot_enco[j] = torch.zeros(self.n_labels)
                    hot_enco[j][labels[j]] = 1

                self.optimizer.zero_grad()

                loss = self.lossFn(hot_enco, output)

                epoch_loss += loss

                loss.backward()

                self.optimizer.step()
            print(f"loss at epoch {i+1}: {epoch_loss.item()}")

In [10]:
classifier_inst = Classifier(62, 8, .08)

classifier_inst.train(dataloader, 20)

loss at epoch 1: 18.693058013916016
loss at epoch 2: 8.538896560668945
loss at epoch 3: 5.81599760055542
loss at epoch 4: 3.5010647773742676
loss at epoch 5: 2.057953357696533
loss at epoch 6: 2.0340616703033447
loss at epoch 7: 2.0647101402282715
loss at epoch 8: 2.0191779136657715
loss at epoch 9: 2.0129785537719727
loss at epoch 10: 2.0069315433502197
loss at epoch 11: 2.0043785572052
loss at epoch 12: 2.0052895545959473
loss at epoch 13: 1.999671459197998
loss at epoch 14: 1.476036787033081
loss at epoch 15: 1.0949585437774658
loss at epoch 16: 1.0907527208328247
loss at epoch 17: 1.0926839113235474
loss at epoch 18: 1.0885788202285767
loss at epoch 19: 1.085910439491272
loss at epoch 20: 1.0845332145690918


In [13]:
joblib.dump(classifier_inst, "captcha_reader.joblib")


['captcha_reader.joblib']

In [12]:
test = 0
for i in range(len(test_input)):
    output = classifier_inst.output(test_input[i].unsqueeze(0).unsqueeze(0))
    if torch.argmax(output) ==  test_label[i]:
        test+= 1
    # else:
        # plt.imshow(test_input[i], cmap="gray")
        # plt.show()
        # print(int2label_dict[torch.argmax(output).item()])
print(test)
print(len(test_input))
test = 0
for i in range(len(train_input)):
    output = classifier_inst.output(train_input[i].unsqueeze(0).unsqueeze(0))
    if torch.argmax(output) ==  train_label[i]:
        test+= 1
    
print(test)
print(len(train_input))

3904
4000
19536
20000
