In [1]:
# from google.colab import drive
# drive.mount('/content/gdrive')

In [1]:
import os
os.chdir("C:/Users/HP/Desktop/Transformer-OCR")

In [2]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import time
from mydataset import MyDataset
from mydataset import char2token, vocab, token2char, subsequent_mask
from mydataset import Batch
from model import make_model
import data_preprocessing
from PIL import Image
import numpy as np
from torchvision.transforms import ToTensor
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
from tqdm import tqdm
import cv2
import sys, os
from torchvision import transforms

In [3]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

cpu


In [4]:
class NoamOpt:
    "Optim wrapper that implements rate."
    def __init__(self, model_size, factor, warmup, optimizer):
        self.optimizer = optimizer
        self._step = 0
        self.warmup = warmup
        self.factor = factor
        self.model_size = model_size
        self._rate = 0
        
    def step(self):
        "Update parameters and rate"
        self._step += 1
        rate = self.rate()
        for p in self.optimizer.param_groups:
            p['lr'] = rate
        self._rate = rate
        self.optimizer.step()
        
    def rate(self, step = None):
        "Implement `lrate` above"
        if step is None:
            step = self._step
        return self.factor * \
            (self.model_size ** (-0.5) *
            min(step ** (-0.5), step * self.warmup ** (-1.5)))
        



In [5]:
class LabelSmoothing(nn.Module):
    "Implement label smoothing."
    def __init__(self, size, padding_idx=0, smoothing=0.0):
        super(LabelSmoothing, self).__init__()
        self.criterion = nn.KLDivLoss(size_average=False)
        self.padding_idx = padding_idx
        self.confidence = 1.0 - smoothing
        self.smoothing = smoothing
        self.size = size
        self.true_dist = None
        
    def forward(self, x, target):
        assert x.size(1) == self.size
        true_dist = x.data.clone()
        true_dist.fill_(self.smoothing / (self.size - 2))
        true_dist.scatter_(1, target.data.unsqueeze(1).type(torch.int64), self.confidence)
        true_dist[:, self.padding_idx] = 0
        mask = torch.nonzero(target.data == self.padding_idx)
        if mask.dim() > 0:
            true_dist.index_fill_(0, mask.squeeze(), 0.0)
        self.true_dist = true_dist
        return self.criterion(x, Variable(true_dist, requires_grad=False))

In [6]:
class SimpleLossCompute:
    "A simple loss compute and train function."
    def __init__(self, generator, criterion, opt=None):
        self.generator = generator
        self.criterion = criterion
        self.opt = opt
        
    def __call__(self, x, y, norm):
        x = self.generator(x)
        loss = self.criterion(x.contiguous().view(-1, x.size(-1)), 
                              y.contiguous().view(-1)) / norm
        if self.opt is not None:
            loss.backward()
            self.opt.step()
            self.opt.optimizer.zero_grad()
        return loss.data * norm


In [7]:
def resizePadding(img, width, height):
    desired_w, desired_h = width, height #(width, height)
    _,img_h, img_w = img.shape  # old_size[0] is in (width, height) format
    # print("img_w: {0}, img_h: {1}".format(img_w, img_h))
    # ratio = img_w/float(img_h)
    # print("ratio:", ratio)
    # new_w = int(desired_h*ratio)
    # new_w = new_w if desired_w == None else min(desired_w, new_w)
    # img = img.resize((3, desired_h, new_w), Image.ANTIALIAS)

    # padding image
    img = img.permute(1,2,0)
    img = img.numpy()
    img = img*255.0
    img = Image.fromarray(img.astype('uint8'), mode = "RGB")
    if desired_w != None: # and desired_w > new_w:
        new_img = Image.new("RGB", (desired_w, desired_h), color = 255)
        new_img.paste(img,(0,0))
        img = new_img

    img = ToTensor()(img)

    return img


In [8]:
class alignCollate(object):

    def __init__(self, imgW, imgH):
        self.imgH = imgH
        self.imgW = imgW
    
    def __call__(self, batch):
        images, label_y, label, y_string= zip(*batch)
        imgH = self.imgH
        imgW = self.imgW
        images = [resizePadding(image, self.imgW, self.imgH) for image in images]
        images = torch.cat([t.unsqueeze(0) for t in images], 0)
        label_ynew = label_y[0].unsqueeze(0)
        for i,ts in enumerate(label_y):
            if i != 0:
                label_ynew = torch.cat((label_ynew,ts.unsqueeze(0)),0)
        label_new = label[0].unsqueeze(0)
        for i,ts in enumerate(label):
            if i != 0:
                label_new = torch.cat((label_new,ts.unsqueeze(0)),0)
        return images, label_ynew, label_new, y_string

In [9]:
st = time.time()
list_train = os.listdir('../train')
print("Loadtime: ", time.time()-st)
list_traindir = []
for index,image in enumerate(list_train):
    if data_preprocessing.valid_image(data_preprocessing.get_label(image)):
        list_traindir.append(image)


list_test = os.listdir('../test')
list_testdir = []
for index,image in enumerate(list_test):
    if data_preprocessing.valid_image(data_preprocessing.get_label(image)):
        list_testdir.append(image)

Loadtime:  1.595595359802246


In [38]:
train_dataset = MyDataset(list_traindir[:70], cate = 'train')
test_dataset = MyDataset(list_testdir[:70], cate = 'test')

In [39]:
print("Train dataset length: {0}".format(len(train_dataset)))
print("Test dataset length: {0}".format(len(test_dataset)))

Train dataset length: 70
Test dataset length: 70


In [44]:
# batch_size = 64
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=20, shuffle=False, num_workers=0,drop_last = False, collate_fn = alignCollate(350,32))
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=20, shuffle=False, num_workers=0, drop_last = False, collate_fn = alignCollate(350,32))

In [45]:
def greedy_decode(src, model, src_mask, max_len=32, start_symbol=1):
    memory = model.encode(src, src_mask)
    ys = torch.ones(1, 1).fill_(start_symbol).long().to(device)
    for i in range(max_len-1):
        out = model.decode(memory, src_mask, 
                           Variable(ys), 
                           Variable(subsequent_mask(ys.size(1))
                                    .long().to(device)))
        prob = model.generator(out[:, -1])
        _, next_word = torch.max(prob, dim = 1)
        next_word = next_word.data[0]
        ys = torch.cat([ys, 
                        torch.ones(1, 1).long().to(device).fill_(next_word)], dim=1)
        if token2char[next_word.item()] == '>':
            break
    ret = ys.cpu().numpy()[0]
    out = [token2char[i] for i in ret]
    out = "".join(out[1:-1])
    return out


In [46]:
def run_epoch(dataloader, model, loss_compute):
    "Standard Training and Logging Function"
    total_tokens = 0
    total_loss = 0
    tokens = 0
    size = len(dataloader.dataset)
    print("Size:", size)
    sum_acc = 0
    for bat, (imgs, labels_y, labels, y_string) in enumerate(tqdm(dataloader)):
        n_acc_sentence = 0
        batch = Batch(imgs, labels_y, labels)
        out = model(batch.imgs, batch.trg, batch.src_mask, batch.trg_mask)
        loss = loss_compute(out, batch.trg_y, batch.ntokens)
        total_loss += loss
        total_tokens += batch.ntokens
        tokens += batch.ntokens
        pred = model.generator(out)
        pred = pred.argmax(2)
        print(pred[0])
        print(labels_y[0])
        for i in range(20):
            sen_i = labels_y[i]
            n_acc_sentence += 1
            for c in range(32):
                if sen_i[c] == 65:
                    break
                elif (pred[i][c] != sen_i[c]):
                    n_acc_sentence -= 1
                    break
        sum_acc += n_acc_sentence
    print("Number of acc: ",sum_acc)

    return total_loss / total_tokens , sum_acc/float(size)

In [47]:
model = make_model(len(char2token))
# model.load_state_dict(torch.load('your-pretrain-model-path'))
model.to(device)
criterion = LabelSmoothing(size=len(char2token), padding_idx=0, smoothing=0.1)
criterion.cuda()
model_opt = NoamOpt(model.tgt_embed[0].d_model, 1, 2000,
        torch.optim.Adam(model.parameters(), lr=5e-2))
for epoch in range(100):
    print("Epoch:", epoch+1)
    model.train()
    train_loss, acc = run_epoch(train_loader, model, SimpleLossCompute(model.generator, criterion, model_opt))
#     model.eval()
#     test_loss = run_epoch(test_loader, model, SimpleLossCompute(model.generator, criterion, None))
    print("Train_loss: ", train_loss.item())
    print("Accuracy: ", acc)
#     print("Test_loss", test_loss.item())


  nn.init.xavier_uniform(p)


Epoch: 1
Size: 70


 25%|██▌       | 1/4 [00:03<00:09,  3.17s/it]

tensor([21, 25,  9, 36, 38, 22,  5, 60, 60, 13, 54, 39, 15, 60, 60, 60, 60, 60,
        25, 39, 60, 60, 22, 30, 30,  4, 30, 60, 60, 25, 60, 25])
tensor([ 3,  3,  2,  4, 29, 50,  7, 64, 33,  8, 38, 56, 65,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       dtype=torch.int32)


 50%|█████     | 2/4 [00:06<00:06,  3.27s/it]

tensor([11, 18, 59, 12, 59, 59, 54, 56,  9, 41, 20, 60, 60, 59, 60, 59, 60, 30,
        25, 60, 23, 60, 60, 60, 60, 60, 25, 30, 18, 24, 48, 18])
tensor([ 3,  5,  5, 17,  6, 13, 38, 29, 52, 61,  2, 58, 48, 11, 29, 65,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       dtype=torch.int32)


 75%|███████▌  | 3/4 [00:10<00:03,  3.64s/it]

tensor([59, 59, 59, 36, 18, 42, 52, 54, 12, 54, 14, 36, 18, 22, 12, 22, 50, 50,
        23, 23,  8, 30, 18, 39, 60, 60, 60, 23, 38, 60, 39, 60])
tensor([ 3,  7,  4, 53, 40, 22, 34, 22,  8, 43,  2, 27, 29, 40, 29, 44, 64, 47,
        47, 34, 65,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       dtype=torch.int32)


 75%|███████▌  | 3/4 [00:12<00:04,  4.30s/it]

tensor([45, 30, 62,  9, 62, 59, 38, 36,  4, 33, 36, 36, 23,  5, 60, 18, 18, 60,
        38, 60, 30, 60, 38, 38, 60, 60, 18, 30, 60,  4, 60, 60])
tensor([ 3,  8, 57, 56, 62, 59, 23, 38, 29, 64,  2, 33, 42, 65,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       dtype=torch.int32)





IndexError: index 10 is out of bounds for dimension 0 with size 10

In [16]:
# torch.save(model.state_dict(),"testmodel.pth")

In [51]:
models = make_model(len(char2token))
models.load_state_dict(torch.load("model40epoch.pth", map_location="cpu"))

  nn.init.xavier_uniform(p)


<All keys matched successfully>

In [64]:
models.to(device)
imgs, labels_y, labels, y_string = next(iter(train_loader))
batch = Batch(imgs, labels_y, labels)
out = models(batch.imgs, batch.trg, batch.src_mask, batch.trg_mask)
out = models.generator(out)
print(out.argmax(2)[0])

tensor([30, 40, 43, 47, 24, 58, 41, 47,  2, 61, 53, 49, 63, 19, 26, 11,  3, 65,
        65, 65, 65, 65, 65, 65, 65, 61, 65, 65, 65, 65, 65, 65])


In [26]:
img_transforms = transforms.Compose([
    transforms.ToTensor(),
    # transforms.Resize((350,32))
])

In [86]:
models.to(device)
models.eval()
src_mask=Variable(torch.from_numpy(np.ones([1, 1, 44], dtype=np.bool)).to(device))
img = Image.open('../test/03B1H7Ke6 9DfVO8ijqp_2009.jpg')
img = img_transforms(img)
img = resizePadding(img,350,32)
img = img.unsqueeze(0).to(device)
pred = greedy_decode(img,model = models, src_mask=src_mask)
print("Pred is: ",pred)

torch.Size([1, 1, 256])
torch.Size([1, 2, 256])
torch.Size([1, 3, 256])
torch.Size([1, 4, 256])
torch.Size([1, 5, 256])
torch.Size([1, 6, 256])
torch.Size([1, 7, 256])
torch.Size([1, 8, 256])
torch.Size([1, 9, 256])
torch.Size([1, 10, 256])
torch.Size([1, 11, 256])
torch.Size([1, 12, 256])
torch.Size([1, 13, 256])
torch.Size([1, 14, 256])
torch.Size([1, 15, 256])
torch.Size([1, 16, 256])
torch.Size([1, 17, 256])
torch.Size([1, 18, 256])
torch.Size([1, 19, 256])
torch.Size([1, 20, 256])
torch.Size([1, 21, 256])
Pred is:  03B1H7Ke6 9DfVO8ijqp


In [68]:
# def val(models,cate):
#   print('Start validation !')
#   acc = 0
#   size = len(list_testdir)
#   size = 10
#   for img_dir in list_testdir[:10]:
#     img_dirs = '../'+ cate + '/' + img_dir 
#     img = Image.open(img_dirs)
#     img = img_transforms(img)
#     img = resizePadding(img,350,32)
#     img = img.unsqueeze(0).to(device)
#     print(img.shape)
#     pred = greedy_decode(img,model = models, src_mask=src_mask)
#     label = data_preprocessing.get_label(img_dir)
#     if pred == label:
#       acc += 1
#     # print("Pred is: ",pred)
#     # print("Label is: ",data_preprocessing.get_label(img_dir))
#   print("Accuracy: ",acc/float(size))

In [75]:
def val(dataloader, models):
  print('Start validation !')
  size = len(dataloader.dataset)
  models.eval()
  correct = 0
  with torch.no_grad():
    for batch, (imgs, label_y, label, label_ystring) in enumerate(tqdm(dataloader)):
        for i in range(imgs.size(0)):
            img = imgs[i].unsqueeze(0).to(device)
            src_mask=Variable(torch.from_numpy(np.ones([1, 1, 44], dtype=np.bool)).to(device))
            pred = greedy_decode(img,model = models, src_mask=src_mask)
            batchs_size = img.size(0)
            for pred, target in zip(pred, label_ystring):
                if pred == target:
                    correct += 1
        accuracy = correct/float(size)
    print("Accuracy: {0}".format(accuracy))

In [None]:
val(test_loader,models)

In [50]:
val(models,'train')

Start validation !


AttributeError: 'EncoderDecoder' object has no attribute 'dataset'

In [1]:
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter('runs')

In [2]:
accuracy = []
for i in range(50):
    writer.add_scalar("Training Loss ", i, global_step=i)
print(accuracy)

[]


In [3]:
!tensorboard --logdir runs

^C
