In [65]:
import random, math
import numpy as np
import tensorflow as tf
from keras.models import Model
from keras.layers import *
# import tf.keras.backend as K
from keras.activations import elu
import cv2, itertools, sys, editdistance, math
from tensorflow.keras.backend import ctc_batch_cost as ctcLoss

seed = 13
random.seed(seed)
np.random.seed(seed)


## data_utils.py

In [66]:
def truncateLabel(text, maxStringLen = 32):
	cost = 0
	for i in range(len(text)):
		if i!=0 and text[i] == text[i-1]:
			cost+=2
		else:
			cost+=1
		if cost > maxStringLen:
			return text[:i]
	return text

def textToLabels(text, unicodes):
	ret = []
	for c in text:
		ret.append(unicodes.index(c))
	return ret

def labelsToText(labels, unicodes):
	ret = []
	for c in labels:
		if c == len(unicodes):
			ret.append("")
		else:
			ret.append(unicodes[c])
	return "".join(ret)

def preprocess(img, dataAugmentation = False):
	(wt, ht) = (128, 32)
	if img is None:
		img = (np.zeros((wt, ht, 1))).astype('uint8')
	img = cv2.threshold(img, 0, 1, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1] * 255

	if dataAugmentation:
		stretch = (random.random() - 0.5) 						# -0.5 .. +0.5
		wStretched = max(int(img.shape[1] * (1 + stretch)), 1)  # random width, but at least 1
		img = cv2.resize(img, (wStretched, img.shape[0])) 		# stretch horizontally by factor 0.5 .. 1.5
	img = closeFit(img)                                         # to avoid lot of white space around text

	h = img.shape[0]
	w = img.shape[1]
	fx = w / wt
	fy = h / ht
	f = max(fx, fy)
	newSize = (max(min(wt, int(w / f)), 1), max(min(ht, int(h / f)), 1)) 	#scale according to f (result at least 1 and at most wt or ht)
	img = cv2.resize(img, newSize, interpolation = cv2.INTER_AREA)   		#INTER_AREA important, Linear loses all info
	img = cv2.threshold(img, 0, 1, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1] * 255

	target = np.ones([ht, wt]) * 255
	target[0:newSize[1], 0:newSize[0]] = img
	img = cv2.transpose(target)
	(m, s) = cv2.meanStdDev(img)
	m = m[0][0]
	s = s[0][0]
	img = img - m
	img = img / s if s>1e-3 else img
	return np.reshape(img, (img.shape[0], img.shape[1], 1))

def closeFit(img):
	i = 2
	col = 255 - np.sum(img, axis=0)/img.shape[0]
	while i<img.shape[1] and col[i]<=5:
		i+=1
	w1 = max(0,i - 15)
	i = img.shape[1]-1
	while i>=0 and col[i]<=5:
		i-=1
	w2 = i + 15

	row = 255 - np.sum(img, axis=1)/img.shape[1]
	i = 2
	while i<img.shape[0] and row[i]<=4:
		i+=1
	h1 = max(0,i - 20)
	i = img.shape[0] - 1
	while i>=0 and row[i]<=5:
		i-=1
	h2 = i + 20
	final = img[h1:h2,w1:w2]
	if final.shape[0]*final.shape[1] == 0:
		return img
	return final

## model_utils.py

In [67]:
def predictImage(imgPath, weightPath):
	img = cv2.imread(imgPath, 0)
	img = preprocess(img, False)
	img = np.reshape(img, (1, img2.shape[0], img2.shape[1], 1))
	unicodes = list(np.load('unicodes.npy', allow_pickle = True))
	model = CRNN(False, len(unicodes + 1))
	model.load_weights(weightPath)
	out = model.predict(img2)
	pred = decode(out)
	print('Recognized Word: '+ str(pred))

def ctcLambdaFunc(yPred, labels, inputLength, labelLength):
	yPred = yPred[:,2:,:]
	loss = ctcLoss(labels, yPred, inputLength, labelLength)
	return loss

def decode(yPred, unicodes):  #Best Path Decoder
	texts = []
	for y in yPred:
		label = list(np.argmax(y[2:],1))
		label = [k for k, g in itertools.groupby(label)]
		text = labelsToText(label, unicodes)
		texts.append(text)
	return texts

def test(model, loader):
	validation = loader.valSet
	trueText = []
	for (i, path) in validation:
		trueText.append(i)

	# Wrap the output of loader.nextVal in a tuple to match Keras input expectations

	batch_size = 3  # Set this to your desired batch size

	# Define output_signature for the validation data generator
	output_signature = (
		tf.TensorSpec(shape=(batch_size, 128, 32, 1), dtype=tf.float32),
	)

	validation_data = tf.data.Dataset.from_generator(
		lambda: loader.nextVal(batch_size),
		output_signature=output_signature
	)

	outputs = model.predict(validation_data, steps=math.ceil(loader.valLength / batch_size))
	unicodes = list(np.load('/content/unicodes.npy', allow_pickle = True))
	predText = decode(outputs, unicodes)

	print(predText)

	wordOK = 0
	wordTot = 0
	charDist = 0
	charTot = 0
	for i in range(len(trueText)):
		#print(predText[i], trueText[i])
		wordOK += 1 if predText[i] == trueText[i] else 0
		wordTot += 1
		dist = editdistance.eval(predText[i], trueText[i])
		charDist += dist
		charTot += len(trueText[i])

	CAR = 100 - 100 * charDist/charTot
	WAR = 100 * wordOK/wordTot
	print('Character Accuracy Rate (CAR):' + str(CAR))
	print('Word Accuracy Rate (WAR):' + str(WAR))
	return (CAR, WAR)

## CRNN.py

In [68]:
import tensorflow as tf
tensor = tf.constant([[1, 2, 3, 4, 5], [2, 3, 4, 5, 6]])

# Reverse along axis 1 (columns)
tf.keras.backend.reverse(tensor, axes=[0])

<tf.Tensor: shape=(2, 5), dtype=int32, numpy=
array([[2, 3, 4, 5, 6],
       [1, 2, 3, 4, 5]], dtype=int32)>

In [69]:
def CRNN(train, outClasses):
	inputShape = (128, 32, 1)
	kernels = [5, 5, 3, 3, 3]
	filters = [32, 64, 128, 128, 256]
	strides = [(2,2), (2,2), (1,2), (1,2), (1,2)]
	rnnUnits = 256
	maxStringLen = 32

	inputs = Input(name = 'inputX', shape = inputShape, dtype = 'float32')
	labels = Input(name='label', shape=[maxStringLen], dtype='float32')
	inputLength = Input(name='inputLen', shape=[1], dtype='int64')
	labelLength = Input(name='labelLen', shape=[1], dtype='int64')

	inner = inputs
	for i in range(len(kernels)):
		inner = Conv2D(filters[i], (kernels[i], kernels[i]), padding = 'same',\
					   name = 'conv' + str(i+1), kernel_initializer = 'glorot_normal') (inner)
		inner = BatchNormalization() (inner)
		inner = Activation(elu) (inner)
		inner = MaxPooling2D(pool_size = strides[i], name = 'max' + str(i+1)) (inner)
	inner = Reshape(target_shape = (maxStringLen,rnnUnits), name = 'reshape')(inner)

	LSF = LSTM(rnnUnits, return_sequences=True, kernel_initializer='glorot_normal', name='LSTM1F') (inner)
	LSB = LSTM(rnnUnits, return_sequences=True, go_backwards = True, kernel_initializer='glorot_normal', name='LSTM1B') (inner)
	LSB = Lambda(lambda inputTensor: tf.keras.backend.reverse(inputTensor, axes=[1]), output_shape=(maxStringLen, rnnUnits)) (LSB)
	LS1 = Average()([LSF, LSB])
	LS1 = BatchNormalization() (LS1)

	LSF = LSTM(rnnUnits, return_sequences=True, kernel_initializer='glorot_normal', name='LSTM2F') (LS1)
	LSB = LSTM(rnnUnits, return_sequences=True, go_backwards = True, kernel_initializer='glorot_normal', name='LSTM2B') (LS1)
	LSB = Lambda(lambda inputTensor: tf.keras.backend.reverse(inputTensor, axes=[1]), output_shape=(maxStringLen, rnnUnits)) (LSB)
	LS2 = Concatenate()([LSF, LSB])
	LS2 = BatchNormalization() (LS2)

	yPred = Dense(outClasses, kernel_initializer='glorot_normal', name='dense2') (LS2)
	yPred = Activation('softmax', name='softmax') (yPred)
	lossOut = Lambda(ctcLambdaFunc, output_shape=(1,), name='ctc') ([yPred, labels, inputLength, labelLength])

	# if train:
	# 	return Model(inputs=[inputs, labels, inputLength, labelLength], outputs=[lossOut, yPred])
	return Model(inputs=[inputs], outputs=yPred)

## DataLoader.py

In [70]:
class DataLoader():
	def __init__(self, trainFile, valFile, unicodes):
		self.unicodes = unicodes
		self.trainFile = trainFile
		self.valFile = valFile
		self.maxStringLen = 32
		self.trainSet = []
		self.valSet = []
		self.trainIndex = 0
		self.valIndex = 0

		if self.trainFile != "":
			self.trainSet = self.importSets(True)
		if self.valFile != "":
			self.valSet = self.importSets(False)
		self.valLength = len(self.valSet)
		self.trainLength = len(self.trainSet)

	def importSets(self, train):
		set = []
		if train:
			file = open(self.trainFile, 'r', encoding='utf-8')
		else:
			file = open(self.valFile, 'r', encoding='utf-8')
		for line in file:
			inUnicodes = True
			if not line or line[0] =='#':
				#Ignoring Erroneous Lines manually skipped with # in file
				continue
			lineSplit = line.strip().split(' ')
			if len(lineSplit) >= 2:
				fileName = lineSplit[0]
				text = truncateLabel(' '.join(lineSplit[1:]))

				for ch in text:
					if not ch in self.unicodes:
						print('Char '+ str(ch)+ ' Not in Unicodes, and Word Omitted')
						#print(ch,('0'+hex(ord(ch))[2:]))
						inUnicodes = False

				if inUnicodes:
					if train:
						set.append((text, fileName))
					else:
						set.append((text, fileName))
			else:
				print(line + 'Check this Line')
		file.close()
		random.shuffle(set)
		return set

	def nextTrain(self, batchSize):
		while True:
			if self.trainIndex + batchSize >= self.trainLength:
				self.trainIndex = 0
				random.shuffle(self.trainSet)
			ret = self.getBatch(self.trainIndex, batchSize, True)
			self.trainIndex += batchSize
			yield ret

	def nextVal(self, batchSize):
		while True:
			if self.valIndex >= self.valLength:
				self.valIndex = 0
			ret = self.getBatch(self.valIndex, batchSize, False)
			self.valIndex += batchSize
			yield (ret,)

	def getBatch(self, index, batchSize, train):
		if train:
			batch = self.trainSet[index:index + batchSize]
			size = self.trainLength
		else:
			batch = self.valSet[index:index + batchSize]
			size = self.valLength

		imgs = []
		labels = np.ones([batchSize, self.maxStringLen]) * len(self.unicodes)
		inputLength = np.zeros([batchSize, 1])
		labelLength = np.zeros([batchSize, 1])

		for i in range(min(batchSize, size-index)):
			img = cv2.imread(batch[i][1], 0)
			if img is None:
				img = np.zeros((128,32,1))
				print(batch[i][1] + 'is not available')

			img = img.astype('uint8')
			imgs.append(preprocess(img.astype('uint8'), train))
			labels[i, 0:len(batch[i][0])] = textToLabels(batch[i][0], self.unicodes)
			labelLength[i] = len(batch[i][0])
			inputLength[i] = self.maxStringLen - 2

		inputs = {
				'inputX' : np.asarray(imgs),
				'label' : labels,
				'inputLen' : inputLength,
				'labelLen' : labelLength,
					}
		outputs = {'ctc' : np.zeros([batchSize])}
		if train:
			return (inputs, outputs)
		else:
			return imgs

## trainer.py

In [72]:
unicodes = list(np.load('/content/unicodes.npy', allow_pickle = True))

loader = DataLoader('', "/content/test.txt", unicodes)
testModel = CRNN(False, len(unicodes) + 1)
testModel.load_weights('/content/crnn_weights_exp2.h5')
CAR, WAR = test(testModel, loader)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 672ms/step
['मनमैला', 'लसिका', 'अलबकाइएगी']
Character Accuracy Rate (CAR):95.0
Word Accuracy Rate (WAR):66.66666666666667
