# Train OCR text Detector quick example

For train datasets please download last version of ocr datasets [https://nomeroff.net.ua/datasets/](https://nomeroff.net.ua/datasets/). Unpack archive and rename to **./datasets/ocr** .
For examle
```bash
cd ./datasets/ocr
wget https://nomeroff.net.ua/datasets/autoriaNumberplateOcrRu-2019-03-06.zip
unzip autoriaNumberplateOcrRu-2019-03-06.zip
mv autoriaNumberplateOcrRu-2019-03-06 ru
```
or use your own dataset.

In [None]:
from google.colab import drive
drive.mount('/content/gdrive/')

Drive already mounted at /content/gdrive/; to attempt to forcibly remount, call drive.mount("/content/gdrive/", force_remount=True).


In [None]:
!unzip '/content/gdrive/My Drive/autoriaNumberplateOcrRu-2020-10-12.zip'

[1;30;43mВыходные данные были обрезаны до нескольких последних строк (5000).[0m
  inflating: autoriaNumberplateOcrRu-2020-10-12/test/ann/X019CX90_0.json  
  inflating: autoriaNumberplateOcrRu-2020-10-12/test/ann/B553OK28.json  
  inflating: autoriaNumberplateOcrRu-2020-10-12/test/ann/T980CA161.json  
  inflating: autoriaNumberplateOcrRu-2020-10-12/test/ann/T555KX69.json  
  inflating: autoriaNumberplateOcrRu-2020-10-12/test/ann/A610AC797.json  
  inflating: autoriaNumberplateOcrRu-2020-10-12/test/ann/E424XY196.json  
  inflating: autoriaNumberplateOcrRu-2020-10-12/test/ann/A349MP54_0.json  
  inflating: autoriaNumberplateOcrRu-2020-10-12/test/ann/P186EC27.json  
  inflating: autoriaNumberplateOcrRu-2020-10-12/test/ann/O789HY77.json  
  inflating: autoriaNumberplateOcrRu-2020-10-12/test/ann/T960XX24.json  
  inflating: autoriaNumberplateOcrRu-2020-10-12/test/ann/K757KK59.json  
  inflating: autoriaNumberplateOcrRu-2020-10-12/test/ann/E350PB33.json  
  inflating: autoriaNumberplateOcrR

In [None]:
import os
import json
from PIL import Image
import cv2
import numpy as np
import matplotlib.pyplot as plt

In [None]:
from torch.utils.data import DataLoader,Dataset
import torchvision.transforms as transforms
from torchvision import models
import torch.nn as nn
from fastai.vision import Path
import torch
from torch.autograd import Variable
import albumentations as A
#from albumentations.pytorch import ToTensorV2

In [None]:
train_path = '/content/autoriaNumberplateOcrRu-2020-10-12/train/img/'
test_path = '/content/autoriaNumberplateOcrRu-2020-10-12/test/img/'
val_path = '/content/autoriaNumberplateOcrRu-2020-10-12/val/img/'

label_train_path = '/content/autoriaNumberplateOcrRu-2020-10-12/train/train_json.json'
label_test_path = '/content/autoriaNumberplateOcrRu-2020-10-12/test/test_json.json'
label_val_path = '/content/autoriaNumberplateOcrRu-2020-10-12/val/val_json.json'

In [None]:
train_files = os.listdir(train_path)
test_files = os.listdir(test_path)
val_files = os.listdir(val_path)

In [None]:
def create_label_json(json_name, label_path):
  label_files = os.listdir(label_path)
  list_json = []

  for json_file in label_files:
    with open(label_path + json_file) as json_file:
      json_file=json.load(json_file)
      img_name=json_file['name']
      predicted=json_file['description']
      dict_json={'img_name':img_name, 'predicted':predicted}
      list_json.append(dict_json)
      
  with open(json_name,'w') as label_json:
    json.dump(list_json, label_json)

Forming json


In [None]:
label_train_path = '/content/autoriaNumberplateOcrRu-2020-10-12/train/ann/'
label_test_path = '/content/autoriaNumberplateOcrRu-2020-10-12/test/ann/'
label_val_path = '/content/autoriaNumberplateOcrRu-2020-10-12/val/ann/'

In [None]:
create_label_json('val_json.json', label_val_path)

In [None]:
!mv '/content/test_json.json' '/content/autoriaNumberplateOcrRu-2020-10-12/test/'
!mv '/content/train_json.json' '/content/autoriaNumberplateOcrRu-2020-10-12/train/'
!mv '/content/val_json.json' '/content/autoriaNumberplateOcrRu-2020-10-12/val/'

In [None]:
ALPHABET = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "A", "B", "C", "E", "H", "K", "M", "O", "P", "T", "X", "Y",'_']
MAX_NOMER = 9
LENGTH_ALPHABET = len(ALPHABET)

In [None]:
def encode(a):
    label_oh = []
    for i, char in enumerate(a):
      onehot = [0]*LENGTH_ALPHABET
      idx = ALPHABET.index(char)
      onehot[idx] += 1
      label_oh += onehot

    # empty space replace  
    while i != MAX_NOMER-1:
      onehot = [0]*LENGTH_ALPHABET
      idx = ALPHABET.index('_')
      onehot[idx] += 1
      label_oh += onehot
      i+=1

    return label_oh

In [None]:
class Mydataset(Dataset):
    def __init__(self, images_path, label_path, is_train=True, transform=None):
        self.images_path = images_path
        self.label_path = label_path
        self.transform = transform


        with open(label_path, 'r') as json_file:
          labels = json.load(json_file)

        self.list_img_names = [label['img_name'] for label in labels]
        self.list_labels = [label['predicted'] for label in labels]

    def __getitem__(self, idx):
        img_name = self.list_img_names[idx]
        image_path = self.images_path + img_name + '.png'
        #img = cv2.imread(image_path,0)
        img = Image.open(image_path)
        img = img.convert('L')
        #img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        label = self.list_labels[idx]
        label_oh = encode(label)

        if self.transform is not None:
            img = self.transform(image=np.array(img))['image']
            img = img/255
            #img = self.transform(img)
            #img = np.transpose(img, (2, 0, 1)).astype(np.float32)
            img = torch.tensor(img, dtype=torch.float)
        return img.unsqueeze(dim=0), np.array(label_oh), label
    
    def __len__(self):
        return len(self.list_img_names)

In [None]:
transform = A.Compose([
        A.Resize(224, 224,interpolation=cv2.INTER_LINEAR),
        A.Cutout(num_holes=10),
        A.ElasticTransform(alpha_affine=2.5,p=)


       # A.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])

In [None]:
transform = transforms.Compose([
    transforms.Resize([224, 224]),
    transforms.ToTensor(),
])

In [None]:
train_ds = Mydataset(train_path, label_train_path, transform=transform)
train_dl = DataLoader(train_ds, batch_size=64, num_workers=0)

In [None]:
train_ds[5]

(tensor([[[0.0784, 0.0784, 0.0784,  ..., 0.0706, 0.0706, 0.0706],
          [0.0784, 0.0784, 0.0784,  ..., 0.0706, 0.0706, 0.0706],
          [0.0745, 0.0784, 0.0784,  ..., 0.0706, 0.0706, 0.0706],
          ...,
          [0.2118, 0.2118, 0.2039,  ..., 0.1255, 0.1490, 0.1647],
          [0.2118, 0.2118, 0.2039,  ..., 0.1255, 0.1490, 0.1647],
          [0.2118, 0.2118, 0.2039,  ..., 0.1255, 0.1490, 0.1647]]]),
 array([0, 0, 0, 0, ..., 0, 0, 0, 0]),
 'A906EY122')

In [None]:
res_pil[0].size()

torch.Size([1, 224, 224])

In [None]:
train_ds[5][0].size()

torch.Size([1, 224, 224])

In [None]:
test_ds = Mydataset(test_path, label_test_path, False, transform)
test_dl = DataLoader(test_ds, batch_size=1, num_workers=0)

In [None]:
model = models.resnet18(pretrained=False)
model.conv1 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
model.fc = nn.Linear(in_features=512, out_features=LENGTH_ALPHABET*MAX_NOMER, bias=True)
model.cuda()

ResNet(
  (conv1): Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [None]:
loss_func = nn.MultiLabelSoftMarginLoss()
optm = torch.optim.Adam(model.parameters(), lr=0.001)

In [None]:
import torch.nn.functional as F

In [None]:
for epoch in range(6):
    for step, i in enumerate(train_dl):
        img, label_oh, label = i
        img = Variable(img).cuda()
        label_oh = Variable(label_oh.float()).cuda()
        pred = model(img)
        loss = loss_func(pred, label_oh)
        optm.zero_grad()
        loss.backward()
        optm.step()
        print('eopch:', epoch+1, 'step:', step+1, 'loss:', loss.item())

eopch: 1 step: 1 loss: 0.7126255631446838
eopch: 1 step: 2 loss: 0.5416606664657593
eopch: 1 step: 3 loss: 0.3862365782260895
eopch: 1 step: 4 loss: 0.28034961223602295
eopch: 1 step: 5 loss: 0.21483926475048065
eopch: 1 step: 6 loss: 0.18036344647407532
eopch: 1 step: 7 loss: 0.16871941089630127
eopch: 1 step: 8 loss: 0.16270089149475098
eopch: 1 step: 9 loss: 0.16181008517742157
eopch: 1 step: 10 loss: 0.1566629260778427
eopch: 1 step: 11 loss: 0.15942463278770447
eopch: 1 step: 12 loss: 0.15420043468475342
eopch: 1 step: 13 loss: 0.15094000101089478
eopch: 1 step: 14 loss: 0.15156958997249603
eopch: 1 step: 15 loss: 0.14775687456130981
eopch: 1 step: 16 loss: 0.14792895317077637
eopch: 1 step: 17 loss: 0.1431404948234558
eopch: 1 step: 18 loss: 0.1437692940235138
eopch: 1 step: 19 loss: 0.14459343254566193
eopch: 1 step: 20 loss: 0.14524155855178833
eopch: 1 step: 21 loss: 0.1420971155166626
eopch: 1 step: 22 loss: 0.14041846990585327
eopch: 1 step: 23 loss: 0.1378341019153595
eopch

In [None]:
model.eval();

In [None]:
true_label = 0
for step, (img, label_oh, label) in enumerate(test_dl):
    img = Variable(img).cuda()
    pred = model(img)

    c0 = ALPHABET[np.argmax(pred.squeeze().cpu().tolist()[0:LENGTH_ALPHABET])]
    c1 = ALPHABET[np.argmax(pred.squeeze().cpu().tolist()[LENGTH_ALPHABET:LENGTH_ALPHABET*2])]
    c2 = ALPHABET[np.argmax(pred.squeeze().cpu().tolist()[LENGTH_ALPHABET*2:LENGTH_ALPHABET*3])]
    c3 = ALPHABET[np.argmax(pred.squeeze().cpu().tolist()[LENGTH_ALPHABET*3:LENGTH_ALPHABET*4])]
    c4 = ALPHABET[np.argmax(pred.squeeze().cpu().tolist()[LENGTH_ALPHABET*4:LENGTH_ALPHABET*5])]
    c5 = ALPHABET[np.argmax(pred.squeeze().cpu().tolist()[LENGTH_ALPHABET*5:LENGTH_ALPHABET*6])]
    c6 = ALPHABET[np.argmax(pred.squeeze().cpu().tolist()[LENGTH_ALPHABET*6:LENGTH_ALPHABET*7])]
    c7 = ALPHABET[np.argmax(pred.squeeze().cpu().tolist()[LENGTH_ALPHABET*7:LENGTH_ALPHABET*8])]
    c8 = ALPHABET[np.argmax(pred.squeeze().cpu().tolist()[LENGTH_ALPHABET*8:LENGTH_ALPHABET*9])]

    c = '%s%s%s%s%s%s%s%s%s' % (c0, c1, c2, c3, c4, c5, c6, c7, c8)
    c =  c.replace('_','')
    if label[0] == c:
      true_label+=1
      print( true_label, ' / ', step)
      print( 'pred',c ,'real',label[0])
    else:
      print('\nОШИБКА = ','pred',c ,'real',label[0],'\n')

[1;30;43mStreaming output truncated to the last 5000 lines.[0m

ОШИБКА =  pred E840HP174 real E840HC134 


ОШИБКА =  pred T481XA77 real O481XA77 


ОШИБКА =  pred E229PP54 real E209PX53 


ОШИБКА =  pred H551AA199 real M851AC197 


ОШИБКА =  pred P623AA14 real P623KA14 


ОШИБКА =  pred O096EO163 real B096EO163 


ОШИБКА =  pred T000YT54 real T008YT54 


ОШИБКА =  pred B255BB750 real B258BB750 


ОШИБКА =  pred A063PP64 real A063PO61 


ОШИБКА =  pred A472CH99 real A472CH96 


ОШИБКА =  pred B693AH79 real O693AY750 


ОШИБКА =  pred P122PP152 real P126PK152 


ОШИБКА =  pred A226AA198 real A926AA113 


ОШИБКА =  pred P132PP69 real P132PP29 


ОШИБКА =  pred A955EP123 real A935EP122 


ОШИБКА =  pred H532AT177 real M532AY174 


ОШИБКА =  pred Y922EK47 real Y920EK47 


ОШИБКА =  pred K225OA799 real X825OA799 


ОШИБКА =  pred T577TP56 real T597TP56 


ОШИБКА =  pred A677KP799 real A467KB799 


ОШИБКА =  pred A104HP82 real A104HC82 


ОШИБКА =  pred C212TT98 real C219ET96 


ОШИБКА =  p

In [None]:
30122/30756

0.9793861360384966

In [None]:
ALPHABET = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "A", "B", "C", "E", "H", "K", "M", "O", "P", "T", "X", "Y",'_']
MAX_NOMER = 9
LENGTH_ALPHABET = len(ALPHABET)
PATH = '/content/model_dump_6_epoch.pth'
transform = transforms.Compose([
    transforms.Resize([224, 224]),
    transforms.ToTensor(),
])

model = torch.load(PATH)
model.eval()

def predict_img(img_path):
  img = Image.open(img_path)
  img = img.convert('L')
  img = transform(img)
  img = Variable(img).cuda()
  img = img.unsqueeze(dim=1)
  pred = model(img)

  c0 = ALPHABET[np.argmax(pred.squeeze().cpu().tolist()[0:LENGTH_ALPHABET])]
  c1 = ALPHABET[np.argmax(pred.squeeze().cpu().tolist()[LENGTH_ALPHABET:LENGTH_ALPHABET*2])]
  c2 = ALPHABET[np.argmax(pred.squeeze().cpu().tolist()[LENGTH_ALPHABET*2:LENGTH_ALPHABET*3])]
  c3 = ALPHABET[np.argmax(pred.squeeze().cpu().tolist()[LENGTH_ALPHABET*3:LENGTH_ALPHABET*4])]
  c4 = ALPHABET[np.argmax(pred.squeeze().cpu().tolist()[LENGTH_ALPHABET*4:LENGTH_ALPHABET*5])]
  c5 = ALPHABET[np.argmax(pred.squeeze().cpu().tolist()[LENGTH_ALPHABET*5:LENGTH_ALPHABET*6])]
  c6 = ALPHABET[np.argmax(pred.squeeze().cpu().tolist()[LENGTH_ALPHABET*6:LENGTH_ALPHABET*7])]
  c7 = ALPHABET[np.argmax(pred.squeeze().cpu().tolist()[LENGTH_ALPHABET*7:LENGTH_ALPHABET*8])]
  c8 = ALPHABET[np.argmax(pred.squeeze().cpu().tolist()[LENGTH_ALPHABET*8:LENGTH_ALPHABET*9])]

  c = '%s%s%s%s%s%s%s%s%s' % (c0, c1, c2, c3, c4, c5, c6, c7, c8)
  c =  c.replace('_','')

  return c

In [None]:
%%time
predict_img('/content/autoriaNumberplateOcrRu-2020-10-12/test/img/A001BP54.png')

CPU times: user 11.4 ms, sys: 0 ns, total: 11.4 ms
Wall time: 11.3 ms


'A001BP54'