In [112]:
import os
import fnmatch
import cv2
import numpy as np
import string
import time

from keras.preprocessing.sequence import pad_sequences

from keras.layers import Dense, LSTM, Reshape, BatchNormalization, Input, Conv2D, MaxPool2D, Lambda, Bidirectional
from keras.models import Model
from keras.activations import relu, sigmoid, softmax
import keras.backend as K
from keras.utils import to_categorical
from keras.callbacks import ModelCheckpoint
import pandas as pd
import matplotlib.pyplot as plt

In [113]:
import tensorflow as tf

tf.logging.set_verbosity(tf.logging.ERROR)

In [114]:
from tensorflow.python.client import device_lib


print(device_lib.list_local_devices())
sess = tf.Session(config = tf.ConfigProto(log_device_placement=True))


[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 6537353436353183419
]


In [115]:
char_list = list(' '+string.digits)

def encode_to_labels(txt):
    dig_lst = []
    for index, char in enumerate(txt):
        try:
            dig_lst.append(char_list.index(char))
        except:
            print(char)
    return dig_lst

In [116]:
max_label_len=20
w,h = 200,32

dataset = pd.read_csv("Train.csv",header=None)

In [141]:
def read_data(dataset,marange):
    data_img = []
    data_txt = []
    data_input_length = []
    data_label_length = []
    data_orig_txt = []
    
    for i in marange:
        img = cv2.imread(dataset.iloc[i,0],cv2.IMREAD_GRAYSCALE)
        img = img/255
        img = np.expand_dims(img , axis = 2)
        data_orig_txt.append(dataset.iloc[i,1])
        data_label_length.append(len(dataset.iloc[i,1]))
        data_input_length.append(31)
        data_img.append(img)
        data_txt.append(encode_to_labels(dataset.iloc[i,1]))
    return data_orig_txt, data_label_length,data_input_length,data_img,data_txt
        
        

In [142]:
train_orig_txt, train_label_length,train_input_length,train_img,train_txt=read_data(dataset,range(0,4500))
valid_orig_txt, valid_label_length,valid_input_length,valid_img,valid_txt=read_data(dataset,range(4500,5000))

train_padded_txt = pad_sequences(train_txt,maxlen = max_label_len,padding='post',value=len(char_list))
valid_padded_txt = pad_sequences(valid_txt,maxlen = max_label_len,padding='post',value=len(char_list))


In [143]:
train_img = np.array(train_img)
train_padded_txt = np.array(train_padded_txt)
train_input_length = np.array(train_input_length) 
train_label_length = np.array(train_label_length)

valid_img = np.array(valid_img)
valid_padded_txt = np.array(valid_padded_txt)
valid_input_length = np.array(valid_input_length) 
valid_label_length = np.array(valid_label_length)

train_img = train_img.reshape(-1,32,200,1)
valid_img = train_img.reshape(-1,32,200,1)


In [144]:

# input with shape of height=32 and width=128 
inputs = Input(shape=(32,200,1))
 
# convolution layer with kernel size (3,3)
conv_1 = Conv2D(64, (3,3), activation = 'relu', padding='same')(inputs)
# poolig layer with kernel size (2,2)
pool_1 = MaxPool2D(pool_size=(2, 2), strides=2)(conv_1)
 
conv_2 = Conv2D(128, (3,3), activation = 'relu', padding='same')(pool_1)
pool_2 = MaxPool2D(pool_size=(2, 2), strides=2)(conv_2)
 
conv_3 = Conv2D(256, (3,3), activation = 'relu', padding='same')(pool_2)
 
conv_4 = Conv2D(256, (3,3), activation = 'relu', padding='same')(conv_3)
# poolig layer with kernel size (2,1)
pool_4 = MaxPool2D(pool_size=(2, 1))(conv_4)
 
conv_5 = Conv2D(512, (3,3), activation = 'relu', padding='same')(pool_4)
# Batch normalization layer
batch_norm_5 = BatchNormalization()(conv_5)
 
conv_6 = Conv2D(512, (3,3), activation = 'relu', padding='same')(batch_norm_5)
batch_norm_6 = BatchNormalization()(conv_6)
pool_6 = MaxPool2D(pool_size=(2, 1))(batch_norm_6)
 
conv_7 = Conv2D(512, (2,2), activation = 'relu')(pool_6)
 
squeezed = Lambda(lambda x: K.squeeze(x, 1))(conv_7)
 
# bidirectional LSTM layers with units=128
blstm_1 = Bidirectional(LSTM(128, return_sequences=True, dropout = 0.2))(squeezed)
blstm_2 = Bidirectional(LSTM(128, return_sequences=True, dropout = 0.2))(blstm_1)
 
outputs = Dense(len(char_list)+1, activation = 'softmax')(blstm_2)

# model to be used at test time
act_model = Model(inputs, outputs)

In [145]:
act_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_14 (InputLayer)        (None, 32, 200, 1)        0         
_________________________________________________________________
conv2d_92 (Conv2D)           (None, 32, 200, 64)       640       
_________________________________________________________________
max_pooling2d_53 (MaxPooling (None, 16, 100, 64)       0         
_________________________________________________________________
conv2d_93 (Conv2D)           (None, 16, 100, 128)      73856     
_________________________________________________________________
max_pooling2d_54 (MaxPooling (None, 8, 50, 128)        0         
_________________________________________________________________
conv2d_94 (Conv2D)           (None, 8, 50, 256)        295168    
_________________________________________________________________
conv2d_95 (Conv2D)           (None, 8, 50, 256)        590080    
__________

In [146]:

labels = Input(name='the_labels', shape=[max_label_len], dtype='float32')
input_length = Input(name='input_length', shape=[1], dtype='int64')
label_length = Input(name='label_length', shape=[1], dtype='int64')
 
 
def ctc_lambda_func(args):
    y_pred, labels, input_length, label_length = args
    
    
    return K.ctc_batch_cost(labels, y_pred, input_length, label_length)
 
 
loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([outputs, labels, input_length, label_length])

#model to be used at training time
model = Model(inputs=[inputs, labels, input_length, label_length], outputs=loss_out)

In [147]:

model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer = 'adam')
 
filepath="best_model.hdf5"
checkpoint = ModelCheckpoint(filepath=filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='auto')
callbacks_list = [checkpoint]

In [140]:

batch_size = 256
epochs = 10
model.fit(x=[train_img, train_padded_txt, train_input_length, train_label_length], 
          y=np.zeros(len(train_img)), 
          batch_size=batch_size, epochs = epochs, 
          validation_data = ([valid_img, valid_padded_txt, valid_input_length, valid_label_length], np.zeros(len(valid_img))), 
          verbose = 1, callbacks = callbacks_list)

ValueError: All input arrays (x) should have the same number of samples. Got array shapes: [(4500, 32, 200, 1), (500, 20), (500, 1), (500, 1)]

In [None]:
# load the saved best model weights
act_model.load_weights('best_model.hdf5')
 
# predict outputs on validation images
prediction = act_model.predict(valid_img[:10])
 
# use CTC decoder
out = K.get_value(K.ctc_decode(prediction, input_length=np.ones(prediction.shape[0])*prediction.shape[1],
                         greedy=True)[0][0])
 
# see the results
i = 0
for x in out:
    print("original_text =  ", valid_orig_txt[i])
    print("predicted text = ", end = '')
    for p in x:  
        if int(p) != -1:
            print(char_list[int(p)], end = '')       
    print('\n')
    i+=1