In [0]:
from keras import backend as K                                    #we are the using the keras backend api t
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.layers import Input, Dense, Activation
from keras.layers import Reshape, Lambda
from keras.layers.merge import add, concatenate
from keras.models import Model
from keras.layers.recurrent import GRU
from keras.models import load_model
from keras.preprocessing.image import img_to_array
from keras.callbacks import ModelCheckpoint
from keras.optimizers import SGD
import numpy as np
from imutils import paths
import cv2
import random
import os

In [0]:
def ctc_lambda_func(args):
    y_pred, labels, input_length, label_length = args
    y_pred = y_pred[:, :, :]
    return K.ctc_batch_cost(labels, y_pred, input_length, label_length)

In [0]:
def get_model(training,img_w,img_h,depth,alphabetLength,absolute_max_string_len):
    conv_filters = 16
    kernel_size = (3, 3)
    pool_size = 2
    time_dense_size = 32
    rnn_size = 512

    act='relu'
    if K.image_data_format()=='channels_first':
      input_shape=(depth,img_w,img_h)                                             #this is a tuple of the input shape of the image 
    else:
      input_shape=(img_w,img_h,depth)

    input_data=Input(name='the_input',shape=input_shape,dtype='float32')
    inner =Conv2D(conv_filters,kernel_size,padding='same',activation=act,kernel_initializer='he_normal',name='conv1')(input_data)

    inner =MaxPooling2D(pool_size=(pool_size,pool_size),name='max1')(inner)
    inner =Conv2D(conv_filters,kernel_size,padding='same',activation=act,kernel_initializer='he_normal',name='conv2')(inner)

    inner =MaxPooling2D(pool_size=(pool_size,pool_size),name='max2')(inner)
    conv_to_rnn_dims = (img_w // (pool_size**2),(img_h // (pool_size**2 )) * conv_filters)
    inner = Reshape(target_shape=conv_to_rnn_dims, name='reshape')(inner)
    inner = Dense(time_dense_size, activation=act, name='dense1')(inner)       

    gru_1 = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru1')(inner)
    gru_1b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru1_b')(inner)
    gru1_merged = add([gru_1, gru_1b])

    gru_2 = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru2')(gru1_merged)
    gru_2b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru2_b')(gru1_merged)

    # transforms RNN output to character activations:
    inner = Dense(alphabetLength, kernel_initializer='he_normal',name='dense2')(concatenate([gru_2, gru_2b]))
    y_pred = Activation('softmax', name='softmax')(inner)

    labels = Input(name='the_labels', shape=[absolute_max_string_len], dtype='float32')
    input_length = Input(name='input_length', shape=[1], dtype='int64')
    label_length = Input(name='label_length', shape=[1], dtype='int64')

    loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([y_pred, labels, input_length, label_length])
    
    if training:
        return Model(inputs=[input_data, labels, input_length, label_length], outputs=loss_out)
    else:
        return Model(inputs=[input_data], outputs=y_pred)

In [0]:

# character classes 
alphabet = u'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 '

In [0]:
#storing all the alphabets in a list and returning the list 
def text_to_labels(text,alphabet):
    ret = []
    for char in text:
        ret.append(alphabet.find(char))
    return ret
    

In [0]:
def loadpic(path,batchSize,absolute_max_string_len,downsample_factor):
    #print(path)
    imgPaths=list(paths.list_images(path))
    random.seed(20)
    #print(len(imgPaths))
    while 1:
        x_batch=[]
        y_batch=np.ones([batchSize, absolute_max_string_len]) * -1
        input_length = np.zeros([batchSize, 1])
        label_length=np.zeros([batchSize, 1])
        labels_batch=[]
        for i in range(batchSize):
                imgpath=imgPaths[random.randint(0,len(imgPaths)-1)]
                img=cv2.imread(imgpath,1)
                img=cv2.resize(img,(128,64))
                img=img_to_array(img)
                
                x_batch.append(img)

                input_length[i]=np.array(img).shape[1]//downsample_factor
                #print(imgpath)
                imgname=imgpath[imgpath.rindex('/')+1:imgpath.rindex('.')]
                #print(imgname)
                labels_batch.append(imgname)

                label_length[i]=len(imgname)
                #print(label_length[i])
                y_batch[i,0:len(imgname)]=text_to_labels(imgname,alphabet)
        x_batch = np.array(x_batch, dtype="float") / 255.0
        x_batch= x_batch.swapaxes(1,2)

        inputs = {'the_input': x_batch,
                  'the_labels': y_batch,
                  'input_length': input_length,
                  'label_length': label_length,
                  'source_str': labels_batch  
                  }
        outputs = {'ctc': np.zeros([batchSize])}  
        yield (inputs,outputs)

In [0]:
def train(dataPath,batchSize,epochs,absolute_max_string_len,downsample_factor,valData):
    #print(dataPath)
    training = True
    alphabetLength = 64 
    model=get_model(training,128,64,3,alphabetLength,absolute_max_string_len)
    
    sgd = SGD(lr=0.02, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5)
    model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=sgd)
    model.summary()
    filepath="ocr_best_weights.hdf5"
    checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
    if valData == None:
        model.fit_generator(generator=loadpic(dataPath,batchSize,absolute_max_string_len,downsample_factor),
                            steps_per_epoch=1000, epochs=epochs,callbacks=[checkpoint])
    else:
        model.fit_generator(generator=loadpic(dataPath,batchSize,absolute_max_string_len,downsample_factor),
                            steps_per_epoch=1000, epochs=epochs, 
                            validation_data=loadpic(valData,batchSize,absolute_max_string_len,downsample_factor), 
                            validation_steps=5,callbacks=[checkpoint])
    #model.save_weights('weights.h5') #save weights 
    #model.save('m.h5')  #save model
    return model

In [0]:
trainFolder='train'
valFolder = 'val'
m = train(trainFolder,batchSize=32,epochs= 2,absolute_max_string_len=16,downsample_factor=4,valData=valFolder)

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
the_input (InputLayer)          (None, 128, 64, 3)   0                                            
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, 128, 64, 16)  448         the_input[0][0]                  
__________________________________________________________________________________________________
max1 (MaxPooling2D)             (None, 64, 32, 16)   0           conv1[0][0]                      
__________________________________________________________________________________________________
conv2 (Conv2D)                  (None, 64, 32, 16)   2320        max1[0][0]                       
__________________________________________________________________________________________________
max2 (MaxP