In [1]:
from __future__ import division
from __future__ import print_function

import sys
import cv2
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import torchvision
from torchvision import datasets, models, transforms
from DL import DataLoader, Batch
from SamplePreprocessor import preprocess
import decode_beam
import os
import pathlib
import BestPath
import Common
import time
import copy
import math
from model import Model

In [2]:
PATH = pathlib.Path(r'C:\Users\ddang\Desktop\HTR\model\model.pt')
saved_model = torch.load(PATH)
model_ft = saved_model['model']
test_batch = saved_model['test_batch']
test_labels = saved_model['test_labels']

In [3]:
def toSparse(texts):
    "put ground truth texts into sparse tensor for ctc_loss"
    indices = []
    values = []
    shape = [len(texts), 0] # last entry must be max(labelList[i])
    charList = open(FilePaths.fnCharList).read()
    # go over all texts
    for (batchElement, text) in enumerate(texts):
        # convert to string of label (i.e. class-ids)
        labelStr = [charList.index(c) for c in text]
        # sparse tensor must have size of max. label-string
        if len(labelStr) > shape[1]:
            shape[1] = len(labelStr)
        # put each label into sparse tensor
        for (i, label) in enumerate(labelStr):
            indices.append([batchElement, i])
            values.append(label)

    return (indices, values, shape)

In [4]:
def getdata(phase):
    loader = DataLoader(filePath = FilePaths.fnTrain, batchSize = Model.batchSize, imgSize = Model.imgSize, maxTextLen = Model.maxTextLen)
    if phase == 'train':
        loader.trainSet()
    else:
        loader.validationSet()
    batch, labels, wlabels = [], [], []
    batchNumber = 0
    while loader.hasNext():
#        iterInfo = loader.getIteratorInfo()
        batch.append(loader.getNext())
        labels.append(toSparse(batch[batchNumber].gtTexts))
        wlabels.append(batch[batchNumber].gtTexts)
        numBatchElements = len(batch[batchNumber].imgs)
#        seqLen.append(Model.maxTextLen * numBatchElements)
        batchNumber+= 1
    numberOfBatches = batchNumber
    charList = loader.charList
    return batch, labels, wlabels, numberOfBatches, charList 

In [5]:
class FilePaths:
    "filenames and paths to data"
    fnCharList = (r'C:\Users\ddang\Desktop\HTR\model\charList.txt')
    fnAccuracy = (r'CC:\Users\ddang\Desktop\HTR\model\accuracy.txt')
    #fnTrain = os.path.join(c, "User", "fastai", "courses", "dl1","Handwriting recognition project","data")
    #fnTrain = (r'C:\Users\User\fastai\courses\dl1\Handwriting recognition project\data')
    fnTrain = pathlib.Path(r'C:\Users\ddang\Desktop\HTR\data')
    #print (fnTrain)
    fnInfer = (r'C:\Users\ddang\Desktop\HTR\data\test.png')
    fnCorpus = (r'C:\Users\ddang\Desktop\HTR\data\corpus.txt')
    fnTrainBatch = pathlib.Path(r'C:\Users\ddang\Desktop\HTR\data')


In [6]:
def softmax(mat):
    "calc softmax such that labels per time-step form probability distribution"
    maxT, _ = mat.shape # dim0=t, dim1=c
    res = np.zeros(mat.shape)
    for t in range(maxT):
        y = mat[t, :]
        e = np.exp(y)
        s = np.sum(e)
        if math.isinf(s):
            res[t, :] = 0
        else:
            res[t, :] = e/s
    return res

In [7]:
#load data
batchtrain, labelstrain, wlabelstrain, numberOfBatchestrain, charList = getdata('train')
batchval, labelsval, wlabelsval, numberOfBatchesval, charList = getdata('val')


In [17]:
#print out predicted labels and labels
test_batch0 = batchval[2].imgs
test_labels0 = wlabelsval[2]
out = model_ft(test_batch0)
out = out.squeeze(3)
out2 = out.permute(0,2,1)
outText = []
loader = DataLoader(filePath = FilePaths.fnTrain, batchSize = Model.batchSize, imgSize = Model.imgSize, maxTextLen = Model.maxTextLen)
charList = loader.charList
accuracy = 0
for i in range (out2.shape[0]):
    softmaxOut = softmax(out2[i].cpu().detach().numpy())
    outText.append(BestPath.ctcBestPath(softmaxOut, charList))
    if outText[i] != " ":
        outText[i] = outText[i].replace(" ","")
    if outText[i] == test_labels0[i]:
        accuracy += 1
accuracy = accuracy/50
print ("Predicted labels: ")
print (outText)
print ("Correct labels: ")
print (test_labels0)
print ("Batch Accuracy: {}".format(accuracy))

Predicted labels: 
['the', 'garte', ',', 'four', 'jclock', 'tomorrow', '?', '"', 'n', 'why', '?', '"', 'he', 'enxquired', ',', 'combing', 'his', 'thick', 'I', 'awny', 'hair', '.', '"', 'To', 'bring', 'me', 'up', 'for', 'tea', ',', 'of', 'ceurse', '.', 'I', 'fust', 'thuaght', ',', ':', ',', 'id', '\'l"', 'sep', ',', ',', 'he', 'told', 'her', ',', 'and', 'sueldenly']
Correct labels: 
['the', 'gate', ',', 'four', "o'clock", 'tomorrow', '?', '"', '"', 'Why', '?', '"', 'he', 'enquired', ',', 'combing', 'his', 'thick', ',', 'tawny', 'hair', '.', '"', 'To', 'bring', 'me', 'up', 'for', 'tea', ',', 'of', 'course', '.', 'I', 'just', 'thought', '.', '"', '"', 'I', "'ll", 'see', ',', '"', 'he', 'told', 'her', ',', 'and', 'suddenly']
Batch Accuracy: 0.64


In [18]:
#display processed image
stack0 = test_batch0[0]
stack1 = test_batch0[10]
stack2 = test_batch0[20]
stack3 = test_batch0[30]
stack4 = test_batch0[40]
for i in range (9):
    stack0 = np.vstack((stack0,test_batch0[i+1]))
    stack1 = np.vstack((stack1,test_batch0[i+11]))
    stack2 = np.vstack((stack2,test_batch0[i+21]))
    stack3 = np.vstack((stack3,test_batch0[i+31]))
    stack4 = np.vstack((stack4,test_batch0[i+41]))
stack0_out = stack0.transpose().astype(np.uint8)
stack1_out = stack1.transpose().astype(np.uint8)
stack2_out = stack2.transpose().astype(np.uint8)
stack3_out = stack3.transpose().astype(np.uint8)
stack4_out = stack4.transpose().astype(np.uint8)
cv2.imshow(' ',stack0_out)
cv2.waitKey(0)
cv2.destroyAllWindows()
cv2.imshow(' ',stack1_out)
cv2.waitKey(0)
cv2.destroyAllWindows()
cv2.imshow(' ',stack2_out)
cv2.waitKey(0)
cv2.destroyAllWindows()
cv2.imshow(' ',stack3_out)
cv2.waitKey(0)
cv2.destroyAllWindows()
cv2.imshow(' ',stack4_out)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [9]:
#test a single image
test_path = (r'C:\Users\ddang\Desktop\HTR\data\Foot.png')
test_img = preprocess(cv2.imread(test_path, cv2.IMREAD_GRAYSCALE), imgSize = Model.imgSize, dataAugmentation = False)
test_img_in = np.expand_dims(test_img, axis=0)
out = model_ft(test_img_in)
out = out.squeeze(3)
out2 = out.permute(0,2,1)
out2 = out2.squeeze(0)
softmaxOut = softmax(out2.cpu().detach().numpy())
test_image_out = BestPath.ctcBestPath(softmaxOut, charList)
test_image_out = test_image_out.replace(" ","")
print (test_image_out)
test_img = test_img.astype(np.uint8)
cv2.imshow(' ',test_img.transpose())
cv2.waitKey(0)
cv2.destroyAllWindows()

FoOt
