In [1]:
import fnmatch
import cv2
import numpy as np
import string
import time

from keras.preprocessing.sequence import pad_sequences

from keras.layers import Dense, LSTM, Reshape, BatchNormalization, Input, Conv2D, MaxPool2D, Lambda, Bidirectional
from keras.models import Model
from keras.activations import relu, sigmoid, softmax
import keras.backend as K
from keras.utils import to_categorical
from keras.callbacks import ModelCheckpoint
import os
import tensorflow as tf
import imgaug.augmenters as iaa
import random
import matplotlib.pyplot as plt
from PIL import Image

Using TensorFlow backend.


In [2]:
# total number of our output classes: len(char_list)

# def batch_generator(img_list,batch_size):
#     while True:
#         imgs=[]
#         ground_truth_txts=[]
#         labels=[]
#         label_lengths=[]
#         input_lengths=[]

#         idxs=np.random.randint(0,len(img_list),batch_size)
#         for idx in idxs:
#             img=cv2.imread('/content/images/'+img_list[idx],0)
#             #img=np.expand_dims(img,axis=2)
#             img=img/255
#             txt=img_list[idx].split('_')[1].split('.')[0]
#             imgs.append(preprocess_img(img,(128,32)))
#             ground_truth_txts.append(txt)
#             labels.append(encode_to_labels(txt))
#             input_lengths.append(31)
#             label_lengths.append(len(txt))
#         imgs=np.expand_dims(imgs,axis=-1)
#         labels=pad_sequences(labels, maxlen=max_len, padding='post', value = len(char_list)+1)
#         yield [np.array(imgs), np.array(labels), np.array(input_lengths), np.array(label_lengths)],[np.zeros(batch_size)]

# def batch_ground_text(img_list,batch_size):
#     while True:
#         imgs=[]
#         ground_truth_txts=[]
#         labels=[]
#         label_lengths=[]
#         input_lengths=[]

#         idxs=np.random.randint(0,len(img_list),batch_size)
#         for idx in idxs:
#             img=cv2.imread('/content/images/'+img_list[idx],0)
#             #img=np.expand_dims(img,axis=2)
#             img=img/255
#             txt=img_list[idx].split('_')[1].split('.')[0]
#             imgs.append(preprocess_img(img,(128,32)))
#             ground_truth_txts.append(txt)
#             labels.append(encode_to_labels(txt))
#             input_lengths.append(31)
#             label_lengths.append(len(txt))
#         imgs=np.expand_dims(imgs,axis=-1)
#         labels=pad_sequences(labels, maxlen=max_len, padding='post', value = len(char_list)+1)
#         return np.array(imgs),np.array(ground_truth_txts),np.array(labels), np.array(input_lengths), np.array(label_lengths)





char_list = string.ascii_letters+string.digits+',.?:;'
 
def encode_to_labels(txt):
    # encoding each output word into digits
    dig_lst = []
    for index, char in enumerate(txt):
        try:
            dig_lst.append(char_list.index(char))
        except:
            print(char)
        
    return dig_lst

def find_dominant_color(image):
        #Resizing parameters
        width, height = 150,150
        image = image.resize((width, height),resample = 0)
        #Get colors from image object
        pixels = image.getcolors(width * height)
        #Sort them by count number(first element of tuple)
        sorted_pixels = sorted(pixels, key=lambda t: t[0])
        #Get the most frequent color
        dominant_color = sorted_pixels[-1][1]
        return dominant_color

def preprocess_img(img, imgSize):
    "put img into target img of size imgSize, transpose for TF and normalize gray-values"

    # there are damaged files in IAM dataset - just use black image instead
    if img is None:
        img = np.zeros([imgSize[1], imgSize[0]]) 
        print("Image None!")

    # create target image and copy sample image into it
    (wt, ht) = imgSize
    (h, w) = img.shape
    fx = w / wt
    fy = h / ht
    f = max(fx, fy)
    newSize = (max(min(wt, int(w / f)), 1),
               max(min(ht, int(h / f)), 1))  # scale according to f (result at least 1 and at most wt or ht)
    img = cv2.resize(img, newSize, interpolation=cv2.INTER_CUBIC) # INTER_CUBIC interpolation best approximate the pixels image
                                                               # see this https://stackoverflow.com/a/57503843/7338066
    most_freq_pixel=find_dominant_color(Image.fromarray(img))
    target = np.ones([ht, wt]) * most_freq_pixel  
    target[0:newSize[1], 0:newSize[0]] = img

    img = target

    return img




In [3]:

training_img = []
training_txt = []
train_input_length = []
train_label_length = []
orig_txt = []
 
#lists for validation dataset
valid_img = []
valid_txt = []
valid_input_length = []
valid_label_length = []
valid_orig_txt = []
 
max_label_len = 0

annot=open('E:/Data generator for CRNN/annotation.txt','r').readlines()
imagenames=[]
txts=[]

for cnt in annot:
    filename,txt=cnt.split(',')[0],cnt.split(',')[1].split('\n')[0]
    imagenames.append(filename)
    txts.append(txt)
    
c = list(zip(imagenames, txts))

random.shuffle(c)

imagenames, txts = zip(*c)
    

    
for i in range(len(imagenames)):
        img = cv2.imread('E:/Data generator for CRNN/images/'+imagenames[i],0)   
 
        img=preprocess_img(img,(128,32))
        img=np.expand_dims(img,axis=-1)
        img = img/255.
        txt = txts[i]
        
        # compute maximum length of the text
        if len(txt) > max_label_len:
            max_label_len = len(txt)
            
           
        # split the 150000 data into validation and training dataset as 10% and 90% respectively
        if i%10 == 0:     
            valid_orig_txt.append(txt)   
            valid_label_length.append(len(txt))
            valid_input_length.append(31)
            valid_img.append(img)
            valid_txt.append(encode_to_labels(txt))
        else:
            orig_txt.append(txt)   
            train_label_length.append(len(txt))
            train_input_length.append(31)
            training_img.append(img)
            training_txt.append(encode_to_labels(txt)) 
        
        # break the loop if total data is 150000
        if i == 50000:
            flag = 1
            break
        i+=1
        
    
    




In [4]:
#pad each output label to maximum text length
 
train_padded_txt = pad_sequences(training_txt, maxlen=max_label_len, padding='post', value = len(char_list))
valid_padded_txt = pad_sequences(valid_txt, maxlen=max_label_len, padding='post', value = len(char_list))

In [5]:

# for c in training_txt[0]:
#     print(char_list[c],end="")
# plt.imshow(training_img[0],cmap='gray')

In [6]:
inputs = Input(shape=(32,128,1))
 
# convolution layer with kernel size (3,3)
conv_1 = Conv2D(64, (3,3), activation = 'relu', padding='same')(inputs)
# poolig layer with kernel size (2,2)
pool_1 = MaxPool2D(pool_size=(2, 2), strides=2)(conv_1)
 
conv_2 = Conv2D(128, (3,3), activation = 'relu', padding='same')(pool_1)
pool_2 = MaxPool2D(pool_size=(2, 2), strides=2)(conv_2)
 
conv_3 = Conv2D(256, (3,3), activation = 'relu', padding='same')(pool_2)
 
conv_4 = Conv2D(256, (3,3), activation = 'relu', padding='same')(conv_3)
# poolig layer with kernel size (2,1)
pool_4 = MaxPool2D(pool_size=(2, 1))(conv_4)
 
conv_5 = Conv2D(512, (3,3), activation = 'relu', padding='same')(pool_4)
# Batch normalization layer
batch_norm_5 = BatchNormalization()(conv_5)
 
conv_6 = Conv2D(512, (3,3), activation = 'relu', padding='same')(batch_norm_5)
batch_norm_6 = BatchNormalization()(conv_6)
pool_6 = MaxPool2D(pool_size=(2, 1))(batch_norm_6)
 
conv_7 = Conv2D(512, (2,2), activation = 'relu')(pool_6)
 
squeezed = Lambda(lambda x: K.squeeze(x, 1))(conv_7)

# bidirectional LSTM layers with units=128
blstm_1 = Bidirectional(LSTM(128, return_sequences=True, dropout = 0.2))(squeezed)
blstm_2 = Bidirectional(LSTM(128, return_sequences=True, dropout = 0.2))(blstm_1)
 
outputs = Dense(len(char_list)+1, activation = 'softmax')(blstm_2)

# model to be used at test time
act_model = Model(inputs, outputs)

Instructions for updating:
If using Keras pass *_constraint arguments to layers.



In [7]:
act_model.summary()

Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 32, 128, 1)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 32, 128, 64)       640       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 16, 64, 64)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 16, 64, 128)       73856     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 8, 32, 128)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 8, 32, 256)        295168    
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 8, 32, 256)        5900

In [8]:
labels = Input(name='the_labels', shape=[max_label_len], dtype='float32')
input_length = Input(name='input_length', shape=[1], dtype='int64')
label_length = Input(name='label_length', shape=[1], dtype='int64')
 
 
def ctc_lambda_func(args):
    y_pred, labels, input_length, label_length = args
 
    return K.ctc_batch_cost(labels, y_pred, input_length, label_length)
 
 
loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([outputs, labels, input_length, label_length])

#model to be used at training time
model = Model(inputs=[inputs, labels, input_length, label_length], outputs=loss_out)

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


In [9]:
model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer = 'adam')
 
filepath="best_model.hdf5"
checkpoint = ModelCheckpoint(filepath=filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='auto')
callbacks_list = [checkpoint]

In [10]:
training_img = np.array(training_img)
train_input_length = np.array(train_input_length)
train_label_length = np.array(train_label_length)

valid_img = np.array(valid_img)
valid_input_length = np.array(valid_input_length)
valid_label_length = np.array(valid_label_length)

In [11]:
batch_size = 64
epochs = 10
model.fit(x=[training_img, train_padded_txt, train_input_length, train_label_length], y=np.zeros(len(training_img)), batch_size=batch_size, epochs = epochs, validation_data = ([valid_img, valid_padded_txt, valid_input_length, valid_label_length], [np.zeros(len(valid_img))]), verbose = 1, callbacks = callbacks_list)

# model.fit_generator(batch_generator(train_img,256),steps_per_epoch=10000,validation_data=batch_generator(test_img,256),
#                     validation_steps=2000,epochs=10,shuffle=True,callbacks=callbacks_list)


Train on 45000 samples, validate on 5001 samples
Epoch 1/10

Epoch 00001: val_loss improved from inf to 20.30821, saving model to best_model.hdf5
Epoch 2/10

Epoch 00002: val_loss improved from 20.30821 to 2.78490, saving model to best_model.hdf5
Epoch 3/10

Epoch 00003: val_loss improved from 2.78490 to 0.90407, saving model to best_model.hdf5
Epoch 4/10

Epoch 00004: val_loss improved from 0.90407 to 0.76348, saving model to best_model.hdf5
Epoch 5/10

Epoch 00005: val_loss improved from 0.76348 to 0.69444, saving model to best_model.hdf5
Epoch 6/10

Epoch 00006: val_loss did not improve from 0.69444
Epoch 7/10

Epoch 00007: val_loss improved from 0.69444 to 0.63220, saving model to best_model.hdf5
Epoch 8/10

Epoch 00008: val_loss improved from 0.63220 to 0.56638, saving model to best_model.hdf5
Epoch 9/10

Epoch 00009: val_loss improved from 0.56638 to 0.52207, saving model to best_model.hdf5
Epoch 10/10

Epoch 00010: val_loss improved from 0.52207 to 0.41925, saving model to best

<keras.callbacks.callbacks.History at 0x1dc8315a5c0>

In [12]:
# valid_img,valid_orig_txt, valid_labels, valid_input_length, valid_label_length=batch_ground_text(test_img,100)

In [13]:

# load the saved best model weights
act_model.load_weights('best_model.hdf5')
 
# predict outputs on validation images
prediction = act_model.predict(valid_img[10:20])
 
# use CTC decoder
out = K.get_value(K.ctc_decode(prediction, input_length=np.ones(prediction.shape[0])*prediction.shape[1],
                         greedy=True)[0][0])
 
# see the results
i = 10
for x in out:
    print("original_text =  ", valid_orig_txt[i])
    print("predicted text = ", end = '')
    for p in x:  
        if int(p) != -1:
            print(char_list[int(p)], end = '')       
    print('\n')
    i+=1

original_text =   xyixenodgxcsi
predicted text = xyixenodgxcsi

original_text =   wkflxboscqi
predicted text = wkflxboscqi

original_text =   duzqbj
predicted text = duzqbj

original_text =   on
predicted text = on

original_text =   blsoh
predicted text = blsoh

original_text =   tzkqnasocqqqplymv
predicted text = tzkqnasocqqqplymv

original_text =   gyndkjymkdcmi
predicted text = gyndkjymkdcmi

original_text =   tdhkxsfwchbqhynmt
predicted text = tdhkxsfwchbqhynmt

original_text =   pfkeg
predicted text = pfkeg

original_text =   qvk
predicted text = qvk



In [14]:
img=cv2.imread('/content/distance.JPG',0)
w, h = img.shape
if h > 128 or w > 32:
    pass
if w < 32:
    add_zeros = np.ones((32-w, h))*255
    img = np.concatenate((img, add_zeros))

if h < 128:
    add_zeros = np.ones((32, 128-h))*255
    img = np.concatenate((img, add_zeros), axis=1)
img = np.expand_dims(img , axis = 2)
img=np.expand_dims(img,axis=0)

# Normalize each image
img = img/255.
pred=act_model.predict(img)
out = K.get_value(K.ctc_decode(pred, input_length=np.ones(pred.shape[0])*pred.shape[1],
                         greedy=True)[0][0])

AttributeError: 'NoneType' object has no attribute 'shape'

In [None]:
for char in out[0]:
  print(char_list[char],end='')

In [None]:
!pip install pyspellchecker

In [24]:
from spellchecker import SpellChecker

spell = SpellChecker()


In [33]:
spell.correction('rathor')

'rather'

In [5]:
img=cv2.imread('./pred_images/distance.JPG',0)

w,h=img.shape
if h>128 or w>32:
    pass
if w < 32:
    add_zeros = np.ones((32-w, h))*255
    img = np.concatenate((img, add_zeros))

if h < 128:
    add_zeros = np.ones((32, 128-h))*255
    img = np.concatenate((img, add_zeros), axis=1)

img=np.expand_dims(np.expand_dims(img,axis=0),axis=-1)
pred=act_model.predict(img)




In [14]:
del training_img,train_input_length,train_label_length,valid_img,valid_input_length,valid_label_length