In [107]:
import torch
import os
import numpy as np
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import onnx
from PIL import Image
import pickle

In [108]:
label2int_dict = {}
for i in range(62):
    if i < 10:
        label2int_dict[str(i)] = i
    
    elif i <36:
        label2int_dict[chr(97 + i - 10)] = i
    
    else:
        label2int_dict[chr(65 + i - 36)] = i

int2label_dict = {}
for key in label2int_dict:
    int2label_dict[label2int_dict[key]] = key

In [109]:
def separate(full_image):
    # vertical cutting
    prev = False
    prev2 = False
    arr = []
    for i in range(full_image[0].shape[0]):

        if np.all(full_image[:, i] < 127) and not prev:
            arr.append(i)
            prev = True
            prev2 = False
        elif not np.all(full_image[:, i] < 127) and not prev2:
            arr.append(i)
            prev2 = True
            prev = False
    # --------------
    
    crop_img = []

    for i in range((len(arr) - 1) // 2):
        image = full_image[:, arr[2*i + 1]: arr[2*i + 2]]

        # horizontal cutting
        prev = False
        prev2 = False
        arr2 = []
        for j in range(image.shape[0]):
            if np.all(image[j, :] < 127) and not prev:
                arr2.append(j)
                prev = True
                prev2 = False

            elif not np.all(image[j, :] < 127) and not prev2:
                arr2.append(j)
                prev2 = True
                prev = False

        image = image[arr2[1]: arr2[-1], :]
        # --------------

        # padding
        pad_width = int((image.shape[0] - image.shape[1]) / 2)
        pad_size = 5
        if pad_width > 0:
            image = np.pad(image, ((pad_size, pad_size), (pad_width + pad_size, pad_width + pad_size)), mode="constant")
        
        else:
            image = np.pad(image, ((pad_size - pad_width, pad_size - pad_width), (pad_size, pad_size)), mode="constant")
        # ----------
        
        image = Image.fromarray(image)
        resized_image = image.resize((28, 28))
        resized_image = np.array(resized_image)
        # crop_img.append(resized_image)

        final_img = torch.from_numpy(resized_image)
        crop_img.append(final_img)

    return crop_img    

In [110]:

input_data = torch.empty(24000, 28, 28)
label_data = torch.empty(24000, dtype=int)

k = 0
for file in os.listdir("data"):

    image = Image.open(os.path.join("data", file))
    # preprocessing
    threshold = 192
    process_image = np.array(image)
    process_image = np.mean(process_image, axis=-1)
    process_image = np.where(process_image < threshold, 0, 255).astype(np.uint8)
    
    img_list = separate(process_image)
    if len(img_list) < 5:
        continue
     
    for  i in range(len(img_list)):
        input_data[k] = img_list[i]
        label_data[k] = label2int_dict[file[i]]

        k += 1
        if k == 24000:
            break

    if k == 24000:
        break

In [111]:
train_input = input_data[:20000]
train_label = label_data[:20000]

test_input = input_data[20000:]
test_label = label_data[20000:]

In [112]:
class dataset():
    def __init__(self, inputs, labels):
        self.inputs = inputs
        self.labels = labels
    
    def __len__(self):
        return len(self.inputs)
    
    def __getitem__(self, index):
        return self.inputs[index], self.labels[index]


dataset = dataset(train_input, train_label)

batch_size = 16

dataloader = DataLoader(dataset, batch_size, shuffle=True, drop_last=True)

In [113]:
class Classifier(nn.Module):
    def __init__(self, n_labels, loader_size, lr):
        super().__init__()
        self.lr = lr
        self.n_labels = n_labels
        self.loader_size = loader_size
        # layers
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, stride=1)
        self.pool2 = nn.AvgPool2d(2)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1)
        self.pool2 = nn.AvgPool2d(2)

        self.fc1 = nn.Linear(800, 256)

        self.fc2 = nn.Linear(256, n_labels)

        # loss function
        self.lossFn = nn.MSELoss()

        # optimizer
        self.optimizer = torch.optim.Adam(self.parameters(), self.lr)


    # output function
    def forward(self,input):
        conv1_out = nn.ReLU()(self.conv1(input))
        pool1_out = self.pool2(conv1_out)
        conv2_out = nn.ReLU()(self.conv2(pool1_out))
        pool2_out = self.pool2(conv2_out)


        flatten_out = pool2_out.view(pool2_out.shape[0], -1)

        out = torch.empty(flatten_out.shape[0], self.n_labels)

        for i in range(flatten_out.shape[0]):
            fc1_out = nn.ReLU()(self.fc1(flatten_out[i]))
            
            fc2_out = self.fc2(fc1_out)

            softmax_out = nn.Softmax(dim=-1)(fc2_out)

            out[i] = softmax_out

        return out

In [115]:
# training function
def train(model, dataloader, n_epochs):
    for i in range(n_epochs):

        epoch_loss = 0
        

        for inputs, labels in dataloader:
            inputs = inputs.unsqueeze(1)
            output = model.forward(inputs)

            hot_enco = torch.empty(model.loader_size, model.n_labels)
            for j in range(model.loader_size):
                hot_enco[j] = torch.zeros(model.n_labels)
                hot_enco[j][labels[j]] = 1

            model.optimizer.zero_grad()

            loss = model.lossFn(hot_enco, output)

            epoch_loss += loss

            loss.backward()

            model.optimizer.step()
        print(f"loss at epoch {i+1}: {epoch_loss.item()}")

In [116]:
classifier_inst = Classifier(62, 16, .0001)
train(classifier_inst, dataloader, 20)

loss at epoch 1: 3.9803853034973145
loss at epoch 2: 0.7240327000617981
loss at epoch 3: 0.0291645098477602
loss at epoch 4: 0.00048545002937316895
loss at epoch 5: 0.0002391715970588848
loss at epoch 6: 0.0001281460135942325
loss at epoch 7: 7.192265911726281e-05


KeyboardInterrupt: 

In [120]:
# torch.save(classifier_inst.state_dict(), "captcha_reader_model.pth")


dummy_input = torch.rand((5, 1, 28, 28))
torch.onnx.export(classifier_inst, dummy_input, "captcha_reader_model5.onnx")
# onnx_model = onnx.load('captcha_reader_model.onnx')


# with open("captcha_reader_model.pickle", "wb") as file:
#     pickle.dump(classifier_inst, file)
# with open("captcha_reader_model_wts.pickle", "wb") as file:
#     pickle.dump(classifier_inst.state_dict(), file)


# with open("captcha_reader_model.pickle", "rb") as file:
#     model = pickle.load(file).cpu()
# with open("captcha_reader_model_wts.pickle", "rb") as file:
#     model.load_state_dict(pickle.load(file))

verbose: False, log level: Level.ERROR



In [117]:
test = 0
for i in range(len(test_input)):
    output = classifier_inst.forward(test_input[i].unsqueeze(0).unsqueeze(0))
    if torch.argmax(output) ==  test_label[i]:
        test+= 1
    else:
        plt.imshow(test_input[i], cmap="gray")
        plt.show()
        print(int2label_dict[torch.argmax(output).item()], int2label_dict[test_label[i].item()])
print(test)
print(len(test_input))
test = 0
for i in range(len(train_input)):
    output = classifier_inst.forward(train_input[i].unsqueeze(0).unsqueeze(0))
    if torch.argmax(output) ==  train_label[i]:
        test+= 1
    
print(test)

print(len(train_input))

4000
4000
20000
20000
