In [1]:
import os
import random

dataset_path = "./dataset0"

characters = os.listdir(dataset_path)
characters.sort()
print('Alphabet:', characters)

def filenames_for_character(character_id):
    filenames = os.listdir(dataset_path + '/' + characters[character_id])
    filenames.sort()
    for filename in filenames:
        yield dataset_path + '/' + characters[character_id] + '/' + filename

def filenames(epochs=1):
    rnd = random.Random()
    rnd.seed(0)
    for epoch in range(epochs):
        res = []
        for i in range(len(characters)):
            for filename in filenames_for_character(i):
                res.append((i, filename))
        rnd.shuffle(res)
        for filename in res:
            yield filename

def train_filenames(epochs=1):
    for (i, filename) in filenames(epochs):
        if filename.endswith('9.png'):
            continue
        yield (i, filename)

def test_filenames():
    for (i, filename) in filenames():
        if not filename.endswith('9.png'):
            continue
        yield (i, filename)

print('Training data size:', sum(1 for _ in train_filenames()))
print('Testing data size:', sum(1 for _ in test_filenames()))

Alphabet: ['а', 'б', 'в', 'г', 'д', 'е', 'ж', 'з', 'и', 'й', 'к', 'л', 'м', 'н', 'о', 'п', 'р', 'с', 'т', 'у', 'ф', 'х', 'ц', 'ч', 'ш', 'щ', 'ь', 'ю', 'я', 'є', 'і', 'ї', 'ґ']
Training data size: 91313
Testing data size: 9961


In [2]:
import torch
import torch.nn as nn

import char_net

net = char_net.CharNet()
print(net)

CharNet(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=33, bias=True)
)


  warn(f"Failed to load image Python extension: {e}")


In [3]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)
net.to(device)

cuda:0


CharNet(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=33, bias=True)
)

In [4]:
import torch.nn as nn
import torch.optim as optim

def train(net, data):
    optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
    criterion = nn.CrossEntropyLoss()

    step = 0
    running_loss = 0.0
    for (input, target) in data:
        input, target = input.to(device), target.to(device)
        optimizer.zero_grad()
        output = net(input)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        step += input.size(dim=0)
        if step % 2000 == 0:
            print(f'[{step:5d}] loss: {running_loss / 2000:.3f}')
            running_loss = 0.0
    
    print('Finished Training')

In [5]:
import torchvision
from torchvision import transforms
from PIL import Image

def gen_data(filenames):
    for (character, img_path) in filenames:
        img = Image.open(img_path)
        img_tensor = transforms.ToTensor()(img)
        img_tensor = transforms.Normalize((torch.mean(img_tensor)), (torch.std(img_tensor)))(img_tensor)

        target = torch.zeros(1, dtype=torch.long)
        target[0] = character
        yield (img_tensor.view((1, 1, 32, 32)), target)

def batched_loader(loader, batch_size = 100):
    inputs = []
    targets = []
    for input, target in loader:
        inputs.append(input)
        targets.append(target)
        if len(inputs) == batch_size:
            yield (torch.cat(inputs, 0), torch.cat(targets, 0))
            inputs = []
            targets = []

In [6]:
batch_size = 64
epochs=10
train(net, batched_loader(gen_data(train_filenames(epochs)), batch_size))

[ 8000] loss: 0.218
[16000] loss: 0.205
[24000] loss: 0.151
[32000] loss: 0.047
[40000] loss: 0.015
[48000] loss: 0.010
[56000] loss: 0.006
[64000] loss: 0.005
[72000] loss: 0.003
[80000] loss: 0.002
[88000] loss: 0.003
[96000] loss: 0.003
[104000] loss: 0.002
[112000] loss: 0.002
[120000] loss: 0.001
[128000] loss: 0.002
[136000] loss: 0.001
[144000] loss: 0.001
[152000] loss: 0.001
[160000] loss: 0.001
[168000] loss: 0.001
[176000] loss: 0.001
[184000] loss: 0.002
[192000] loss: 0.001
[200000] loss: 0.001
[208000] loss: 0.001
[216000] loss: 0.001
[224000] loss: 0.000
[232000] loss: 0.000
[240000] loss: 0.001
[248000] loss: 0.001
[256000] loss: 0.002
[264000] loss: 0.001
[272000] loss: 0.000
[280000] loss: 0.000
[288000] loss: 0.000
[296000] loss: 0.000
[304000] loss: 0.001
[312000] loss: 0.000
[320000] loss: 0.000
[328000] loss: 0.001
[336000] loss: 0.000
[344000] loss: 0.000
[352000] loss: 0.000
[360000] loss: 0.000
[368000] loss: 0.000
[376000] loss: 0.000
[384000] loss: 0.000
[392

In [7]:
with torch.no_grad():
    tests = 0
    matches = 0
    for (input, target) in gen_data(test_filenames()):
        input = input.to(device)
        output = net(input)
        if torch.max(output ,1)[1] == target[0]:
            matches += 1
        else:
            print('Expected ', characters[target[0]], ' predicted ', characters[torch.max(output, 1)[1]])
        tests += 1

print(matches / tests)

Expected  б  predicted  в
Expected  л  predicted  и
Expected  з  predicted  і
0.9996988254191346


In [8]:
torch.save(net.state_dict(), 'char_net0.pt')