In [88]:
import torch
import os
import cv2
import numpy as np
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import joblib
import onnx

In [89]:
label2int_dict = {}
for i in range(62):
    if i < 10:
        label2int_dict[str(i)] = i
    
    elif i <36:
        label2int_dict[chr(97 + i - 10)] = i
    
    else:
        label2int_dict[chr(65 + i - 36)] = i

int2label_dict = {}
for key in label2int_dict:
    int2label_dict[label2int_dict[key]] = key

print(label2int_dict)

{'0': 0, '1': 1, '2': 2, '3': 3, '4': 4, '5': 5, '6': 6, '7': 7, '8': 8, '9': 9, 'a': 10, 'b': 11, 'c': 12, 'd': 13, 'e': 14, 'f': 15, 'g': 16, 'h': 17, 'i': 18, 'j': 19, 'k': 20, 'l': 21, 'm': 22, 'n': 23, 'o': 24, 'p': 25, 'q': 26, 'r': 27, 's': 28, 't': 29, 'u': 30, 'v': 31, 'w': 32, 'x': 33, 'y': 34, 'z': 35, 'A': 36, 'B': 37, 'C': 38, 'D': 39, 'E': 40, 'F': 41, 'G': 42, 'H': 43, 'I': 44, 'J': 45, 'K': 46, 'L': 47, 'M': 48, 'N': 49, 'O': 50, 'P': 51, 'Q': 52, 'R': 53, 'S': 54, 'T': 55, 'U': 56, 'V': 57, 'W': 58, 'X': 59, 'Y': 60, 'Z': 61}


In [90]:
def separate(full_image):
    # vertical cutting
    prev = False
    prev2 = False
    arr = []
    for i in range(full_image[0].shape[0]):

        if np.all(full_image[:, i] < 127) and not prev:
            arr.append(i)
            prev = True
            prev2 = False
        elif not np.all(full_image[:, i] < 127) and not prev2:
            arr.append(i)
            prev2 = True
            prev = False
    # --------------
    
    crop_img = []

    for i in range((len(arr) - 1) // 2):
        image = full_image[:, arr[2*i + 1]: arr[2*i + 2]]

        # horizontal cutting
        prev = False
        prev2 = False
        arr2 = []
        for j in range(image.shape[0]):
            if np.all(image[j, :] < 127) and not prev:
                arr2.append(j)
                prev = True
                prev2 = False

            elif not np.all(image[j, :] < 127) and not prev2:
                arr2.append(j)
                prev2 = True
                prev = False

        image = image[arr2[1]: arr2[-1], :]
        # --------------

        # padding
        pad_width = int((image.shape[0] - image.shape[1]) / 2)
        pad_size = 5
        if pad_width > 0:
            image = np.pad(image, ((pad_size, pad_size), (pad_width + pad_size, pad_width + pad_size)), mode="constant")
        
        else:
            image = np.pad(image, ((pad_size - pad_width, pad_size - pad_width), (pad_size, pad_size)), mode="constant")
        # ----------
        
        resized_image = cv2.resize(image, (28, 28), cv2.INTER_CUBIC)

        final_img = torch.from_numpy(resized_image)
        crop_img.append(final_img)

    return crop_img    

In [91]:

input_data = torch.empty(24000, 28, 28)
label_data = torch.empty(24000, dtype=int)

k = 0
for file in os.listdir("data"):
    
    image = cv2.imread(os.path.join("data", file))

    process_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    threshold, process_image = cv2.threshold(process_image, 0, 255, cv2.THRESH_OTSU)

    
    img_list = separate(process_image)

    for  i in range(len(img_list)):
        input_data[k] = img_list[i]
        label_data[k] = label2int_dict[file[i]]

        k += 1
        if k == 24000:
            break

    if k == 24000:
        break

In [92]:
train_input = input_data[:20000]
train_label = label_data[:20000]

test_input = input_data[20000:]
test_label = label_data[20000:]

In [93]:
class dataset():
    def __init__(self, inputs, labels):
        self.inputs = inputs
        self.labels = labels
    
    def __len__(self):
        return len(self.inputs)
    
    def __getitem__(self, index):
        return self.inputs[index], self.labels[index]


dataset = dataset(train_input, train_label)

batch_size = 8

dataloader = DataLoader(dataset, batch_size, shuffle=True, drop_last=True)

In [94]:
class Classifier(nn.Module):
    def __init__(self, n_labels, loader_size, lr):
        super().__init__()
        self.lr = lr
        self.n_labels = n_labels
        self.loader_size = loader_size
        # layers
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, stride=1)
        self.pool2 = nn.AvgPool2d(2)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1)
        self.pool2 = nn.AvgPool2d(2)

        self.fc1 = nn.Linear(800, 256)

        self.fc2 = nn.Linear(256, n_labels)

        # loss function
        self.lossFn = nn.MSELoss()

        # optimizer
        self.optimizer = torch.optim.SGD(self.parameters(), self.lr)


    # output function
    def forward(self,input):
        conv1_out = nn.ReLU()(self.conv1(input))
        pool1_out = self.pool2(conv1_out)
        conv2_out = nn.ReLU()(self.conv2(pool1_out))
        pool2_out = self.pool2(conv2_out)


        flatten_out = pool2_out.view(pool2_out.shape[0], -1)

        out = torch.empty(flatten_out.shape[0], self.n_labels)

        for i in range(flatten_out.shape[0]):
            fc1_out = nn.ReLU()(self.fc1(flatten_out[i]))
            
            fc2_out = self.fc2(fc1_out)

            softmax_out = nn.Softmax(dim=-1)(fc2_out)

            out[i] = softmax_out

        return out

In [95]:
# training function
def train(model, dataloader, n_epochs):
    for i in range(n_epochs):

        epoch_loss = 0
        

        for inputs, labels in dataloader:            
            inputs = inputs.unsqueeze(1)
            output = model.forward(inputs)

            hot_enco = torch.empty(model.loader_size, model.n_labels)
            for j in range(model.loader_size):
                hot_enco[j] = torch.zeros(model.n_labels)
                hot_enco[j][labels[j]] = 1

            model.optimizer.zero_grad()

            loss = model.lossFn(hot_enco, output)

            epoch_loss += loss

            loss.backward()

            model.optimizer.step()
        print(f"loss at epoch {i+1}: {epoch_loss.item()}")

In [None]:
classifier_inst = Classifier(62, 8, .08)
train(classifier_inst, dataloader, 20)

In [108]:

dummy_input = torch.rand((1, 1, 28, 28))
torch.onnx.export(classifier_inst, dummy_input, "captcha_reader_model.onnx")

Exported graph: graph(%input.1 : Float(1, 1, 28, 28, strides=[784, 784, 28, 1], requires_grad=0, device=cpu),
      %conv1.weight : Float(16, 1, 3, 3, strides=[9, 9, 3, 1], requires_grad=1, device=cpu),
      %conv1.bias : Float(16, strides=[1], requires_grad=1, device=cpu),
      %conv2.weight : Float(32, 16, 3, 3, strides=[144, 9, 3, 1], requires_grad=1, device=cpu),
      %conv2.bias : Float(32, strides=[1], requires_grad=1, device=cpu),
      %fc1.bias : Float(256, strides=[1], requires_grad=1, device=cpu),
      %fc2.bias : Float(62, strides=[1], requires_grad=1, device=cpu),
      %onnx::MatMul_56 : Float(800, 256, strides=[1, 800], requires_grad=0, device=cpu),
      %onnx::MatMul_57 : Float(256, 62, strides=[1, 256], requires_grad=0, device=cpu)):
  %/conv1/Conv_output_0 : Float(1, 16, 26, 26, strides=[10816, 676, 26, 1], requires_grad=0, device=cpu) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[3, 3], pads=[0, 0, 0, 0], strides=[1, 1], onnx_name="/conv1/Conv"](%input.1

In [106]:
test = 0
for i in range(len(test_input)):
    output = classifier_inst.forward(test_input[i].unsqueeze(0).unsqueeze(0))
    if torch.argmax(output) ==  test_label[i]:
        test+= 1
    # else:
        # plt.imshow(test_input[i], cmap="gray")
        # plt.show()
        # print(int2label_dict[torch.argmax(output).item()]
print(test)
print(len(test_input))
test = 0
for i in range(len(train_input)):
    output = classifier_inst.forward(train_input[i].unsqueeze(0).unsqueeze(0))
    if torch.argmax(output) ==  train_label[i]:
        test+= 1
    
print(test)
print(len(train_input))

3989
4000
19928
20000
