In [39]:
import tensorflow as tf
import cv2
import os
import matplotlib.pyplot as plt
import numpy as np
from keras.preprocessing.sequence import pad_sequences

from keras.layers import Dense, LSTM, Reshape, BatchNormalization, Input, Conv2D, MaxPool2D, Lambda, Bidirectional
from keras.models import Model
from keras.activations import relu, sigmoid, softmax
import keras.backend as K
#from keras.utils import to_categorical
from keras.callbacks import ModelCheckpoint

In [40]:
def resize_padding(img, width, height):
    '''

    :param img: input image
    :param width: desired width size
    :param height: desired height size
    :return: image with the new size
    '''

    desiredW, desiredH = width, height
    img_shape = img.shape
    imgW, imgH = img_shape[1], img_shape[0]
    ratio = 1.0 * imgW/imgH
    newW = int(desiredH * ratio)
    newW = newW if desiredW == None else min(desiredW, newW)
    img = cv2.resize(img, (newW, desiredH), cv2.INTER_AREA)

    # padding image
    if desiredW != None and desiredW > newW:
        white = [255, 255, 255]
        left_border = int((desiredW - newW) / 2)
        right_border = desiredW - left_border - newW
        img = cv2.copyMakeBorder(img.copy(), 0, 0, left_border, right_border, cv2.BORDER_CONSTANT, value=white)

    return img

In [41]:
words_dic_path = '../data/words.txt'
with open(words_dic_path) as f:
    words = f.read().splitlines()
    
def txt_to_label(text_words):
    # encoding eahch output word into a numerical label
    label = []
    
    for idx, word in enumerate(text_words):
        try:
            label.append(words.index(word) + 1) # 0 is reserved for blank
        except:
            print('{} not found in the words.txt file'.format(word))
    return label

In [42]:
# lists for training dataset
train_img = []
train_label = []
train_input_length = []
train_label_length = []
train_orig_txt = []

# lists for testing dataset
test_img = []
test_label = []
test_input_length = []
test_label_length = []
test_orig_txt = []

max_label_len = 0

# --------- Train dataset ---------

# read the training images
training_images_list_path = '../data/train.txt'

with open(training_images_list_path) as f:
    images_fnames = f.read().splitlines() 

idx = 0
print('Preparing the raw images for training dataset...')
for img_fname in images_fnames: 
    
    img_path = os.path.join('../data/images', img_fname)
    print ('{0}/{1}: {2}'.format(idx, len(images_fnames), img_path))
    idx += 1
    img = cv2.imread(img_path, 0)
    
    # convert each image to shape of (32, 512, 1)
    img = resize_padding(img, 512, 32)
    
    # convert each to 1/0 image
    img = img/255.
    
    #expand each image as (..., ..., 1) to make it compatible with input shape of nn.
    img = np.expand_dims(img, axis=2)
    
    # read the text of the image
    f = open(os.path.join('../data/labels', img_fname.split('.')[0] + '.txt'), 'r')
    txt = f.read()
    
    # compute maximum length of the text
    words = txt.split(' ')
    if len(words) > max_label_len:
        max_label_len = len(words)
    
    train_orig_txt.append(txt)
    train_label_length.append(len(words))
    train_input_length.append(49)
    train_img.append(img)
    train_label.append(txt_to_label(words))
    
    
# pad each output label to maximum text length
train_label = pad_sequences(train_label, maxlen=max_label_len, padding='post', value=0)


# --------- Test dataset ---------

# read the testing images
test_images_list_path = '../data/test.txt'

with open(test_images_list_path) as f:
    images_fnames = f.read().splitlines() 

idx = 0
print('---------------------------------------')
print('Preparing the raw images for testing...')
for img_fname in images_fnames: 
    
    img_path = os.path.join('../data/images', img_fname)
    print ('{0}/{1}: {2}'.format(idx, len(images_fnames), img_path))
    idx += 1
    img = cv2.imread(img_path, 0)
    
    # convert each image to shape of (32, 512, 1)
    img = resize_padding(img, 512, 32)
    
    # convert each to 1/0 image
    img = img/255.
    
    #expand each image as (..., ..., 1) to make it compatible with input shape of nn.
    img = np.expand_dims(img, axis=2)
    
    # read the text of the image
    f = open(os.path.join('../data/labels', img_fname.split('.')[0] + '.txt'), 'r')
    txt = f.read()
    
    # compute maximum length of the text
    words = txt.split(' ')
    if len(words) > max_label_len:
        max_label_len = len(words)
    
    test_orig_txt.append(txt)
    test_label_length.append(len(words))
    test_input_length.append(49)
    test_img.append(img)
    test_label.append(txt_to_label(words))
    
    
# pad each output label to maximum text length
test_label = pad_sequences(test_label, maxlen=max_label_len, padding='post', value=0)

Preparing the raw images for training dataset...
0/1931: ../data/images/10389944244.jpg
1/1931: ../data/images/30177407.jpg
2/1931: ../data/images/673195.jpg
3/1931: ../data/images/60521.jpg
4/1931: ../data/images/40948488487.jpg
5/1931: ../data/images/900000000000.jpg
6/1931: ../data/images/12420.jpg
7/1931: ../data/images/224250.jpg
8/1931: ../data/images/11079168.jpg
9/1931: ../data/images/372443208.jpg
10/1931: ../data/images/80376657.jpg
11/1931: ../data/images/800000000000.jpg
12/1931: ../data/images/307425905246.jpg
13/1931: ../data/images/306508412.jpg
14/1931: ../data/images/8081380.jpg
15/1931: ../data/images/30154262861.jpg
16/1931: ../data/images/16118642654.jpg
17/1931: ../data/images/139041.jpg
18/1931: ../data/images/663552.jpg
19/1931: ../data/images/9387503.jpg
20/1931: ../data/images/500000000000.jpg
21/1931: ../data/images/8000000000.jpg
22/1931: ../data/images/491343760.jpg
23/1931: ../data/images/11957621654.jpg
24/1931: ../data/images/14373018.jpg
25/1931: ../data

308/1931: ../data/images/15600.jpg
309/1931: ../data/images/5271509013.jpg
310/1931: ../data/images/10000000000.jpg
311/1931: ../data/images/18830909523.jpg
312/1931: ../data/images/50609324141.jpg
313/1931: ../data/images/892902266.jpg
314/1931: ../data/images/815160800000.jpg
315/1931: ../data/images/300000000000.jpg
316/1931: ../data/images/20715561.jpg
317/1931: ../data/images/50474068959.jpg
318/1931: ../data/images/579304714604.jpg
319/1931: ../data/images/2899370000.jpg
320/1931: ../data/images/80447807283.jpg
321/1931: ../data/images/70564464466.jpg
322/1931: ../data/images/20000000000.jpg
323/1931: ../data/images/3867000000.jpg
324/1931: ../data/images/521802.jpg
325/1931: ../data/images/40668.jpg
326/1931: ../data/images/694685.jpg
327/1931: ../data/images/660590000000.jpg
328/1931: ../data/images/2274034.jpg
329/1931: ../data/images/336543.jpg
330/1931: ../data/images/18489846914.jpg
331/1931: ../data/images/1055549086.jpg
332/1931: ../data/images/19912851129.jpg
333/1931: .

554/1931: ../data/images/19810452.jpg
555/1931: ../data/images/5200039.jpg
556/1931: ../data/images/5609038371.jpg
557/1931: ../data/images/888155.jpg
558/1931: ../data/images/17975226226.jpg
559/1931: ../data/images/80039.jpg
560/1931: ../data/images/50606051.jpg
561/1931: ../data/images/70354568.jpg
562/1931: ../data/images/70954.jpg
563/1931: ../data/images/17670726326.jpg
564/1931: ../data/images/724252727.jpg
565/1931: ../data/images/384201463231.jpg
566/1931: ../data/images/4200254.jpg
567/1931: ../data/images/80072908049.jpg
568/1931: ../data/images/700493252.jpg
569/1931: ../data/images/20842.jpg
570/1931: ../data/images/6456464.jpg
571/1931: ../data/images/19048220.jpg
572/1931: ../data/images/5346307620.jpg
573/1931: ../data/images/39000000000.jpg
574/1931: ../data/images/80291897.jpg
575/1931: ../data/images/1522195385.jpg
576/1931: ../data/images/15884925126.jpg
577/1931: ../data/images/16961.jpg
578/1931: ../data/images/1650244.jpg
579/1931: ../data/images/50782232.jpg
580

795/1931: ../data/images/18972.jpg
796/1931: ../data/images/20919.jpg
797/1931: ../data/images/10889338929.jpg
798/1931: ../data/images/858146.jpg
799/1931: ../data/images/12185364.jpg
800/1931: ../data/images/3784918.jpg
801/1931: ../data/images/28700000000.jpg
802/1931: ../data/images/50377656.jpg
803/1931: ../data/images/60258969607.jpg
804/1931: ../data/images/192000.jpg
805/1931: ../data/images/493490154.jpg
806/1931: ../data/images/11539338612.jpg
807/1931: ../data/images/7545919116.jpg
808/1931: ../data/images/141810457.jpg
809/1931: ../data/images/75600000000.jpg
810/1931: ../data/images/870880715216.jpg
811/1931: ../data/images/11763.jpg
812/1931: ../data/images/19514938960.jpg
813/1931: ../data/images/993115637407.jpg
814/1931: ../data/images/3274927775.jpg
815/1931: ../data/images/6665486211.jpg
816/1931: ../data/images/512438974820.jpg
817/1931: ../data/images/200000000.jpg
818/1931: ../data/images/19001972.jpg
819/1931: ../data/images/60820564489.jpg
820/1931: ../data/imag

1029/1931: ../data/images/2589318251.jpg
1030/1931: ../data/images/5292128.jpg
1031/1931: ../data/images/434168777157.jpg
1032/1931: ../data/images/60745967404.jpg
1033/1931: ../data/images/60547488573.jpg
1034/1931: ../data/images/19574870909.jpg
1035/1931: ../data/images/15911066.jpg
1036/1931: ../data/images/19885104221.jpg
1037/1931: ../data/images/2003278.jpg
1038/1931: ../data/images/8573183.jpg
1039/1931: ../data/images/30214470.jpg
1040/1931: ../data/images/16529940.jpg
1041/1931: ../data/images/13902.jpg
1042/1931: ../data/images/7128898.jpg
1043/1931: ../data/images/4100000000.jpg
1044/1931: ../data/images/12595805808.jpg
1045/1931: ../data/images/17159715291.jpg
1046/1931: ../data/images/410481758.jpg
1047/1931: ../data/images/950000.jpg
1048/1931: ../data/images/50293020162.jpg
1049/1931: ../data/images/15605936.jpg
1050/1931: ../data/images/5251497.jpg
1051/1931: ../data/images/813055944.jpg
1052/1931: ../data/images/30651870.jpg
1053/1931: ../data/images/8951304.jpg
1054/

1273/1931: ../data/images/9188896942.jpg
1274/1931: ../data/images/526586405555.jpg
1275/1931: ../data/images/394282.jpg
1276/1931: ../data/images/10156.jpg
1277/1931: ../data/images/296633.jpg
1278/1931: ../data/images/40290119.jpg
1279/1931: ../data/images/6180123.jpg
1280/1931: ../data/images/4002353712.jpg
1281/1931: ../data/images/2671232261.jpg
1282/1931: ../data/images/398739639115.jpg
1283/1931: ../data/images/538542216.jpg
1284/1931: ../data/images/40328205741.jpg
1285/1931: ../data/images/450272714030.jpg
1286/1931: ../data/images/831391.jpg
1287/1931: ../data/images/8805587.jpg
1288/1931: ../data/images/2000000000.jpg
1289/1931: ../data/images/70000000000.jpg
1290/1931: ../data/images/20505636842.jpg
1291/1931: ../data/images/7380727.jpg
1292/1931: ../data/images/9750659731.jpg
1293/1931: ../data/images/635632817353.jpg
1294/1931: ../data/images/30747.jpg
1295/1931: ../data/images/40458764677.jpg
1296/1931: ../data/images/7200000000.jpg
1297/1931: ../data/images/1633243548.j

1527/1931: ../data/images/12622102.jpg
1528/1931: ../data/images/20116669988.jpg
1529/1931: ../data/images/58000000.jpg
1530/1931: ../data/images/3306369252.jpg
1531/1931: ../data/images/5660467741.jpg
1532/1931: ../data/images/90217475.jpg
1533/1931: ../data/images/20745703322.jpg
1534/1931: ../data/images/1061292491.jpg
1535/1931: ../data/images/6063304.jpg
1536/1931: ../data/images/8231657.jpg
1537/1931: ../data/images/6273970.jpg
1538/1931: ../data/images/80760488.jpg
1539/1931: ../data/images/11516190.jpg
1540/1931: ../data/images/4938924.jpg
1541/1931: ../data/images/4500000000.jpg
1542/1931: ../data/images/14017900.jpg
1543/1931: ../data/images/50571.jpg
1544/1931: ../data/images/610000000000.jpg
1545/1931: ../data/images/400000000000.jpg
1546/1931: ../data/images/1272102.jpg
1547/1931: ../data/images/50610.jpg
1548/1931: ../data/images/2844157295.jpg
1549/1931: ../data/images/14117574906.jpg
1550/1931: ../data/images/3398000000.jpg
1551/1931: ../data/images/322633781012.jpg
155

1770/1931: ../data/images/70320027.jpg
1771/1931: ../data/images/449907.jpg
1772/1931: ../data/images/70879.jpg
1773/1931: ../data/images/524278690818.jpg
1774/1931: ../data/images/609343472156.jpg
1775/1931: ../data/images/11446540.jpg
1776/1931: ../data/images/5000000000.jpg
1777/1931: ../data/images/14796694189.jpg
1778/1931: ../data/images/64772200000.jpg
1779/1931: ../data/images/146857487.jpg
1780/1931: ../data/images/937354900000.jpg
1781/1931: ../data/images/60299855.jpg
1782/1931: ../data/images/569365588.jpg
1783/1931: ../data/images/859067879.jpg
1784/1931: ../data/images/50038.jpg
1785/1931: ../data/images/40292566723.jpg
1786/1931: ../data/images/70986.jpg
1787/1931: ../data/images/10976139.jpg
1788/1931: ../data/images/833812.jpg
1789/1931: ../data/images/3516584234.jpg
1790/1931: ../data/images/2187637.jpg
1791/1931: ../data/images/18285080994.jpg
1792/1931: ../data/images/80650659.jpg
1793/1931: ../data/images/1965975.jpg
1794/1931: ../data/images/987955156.jpg
1795/193

169/215: ../data/images/100000.jpg
170/215: ../data/images/40000000000.jpg
171/215: ../data/images/3100000.jpg
172/215: ../data/images/20583604.jpg
173/215: ../data/images/40000000000.jpg
174/215: ../data/images/500000000000.jpg
175/215: ../data/images/12569.jpg
176/215: ../data/images/6064574143.jpg
177/215: ../data/images/40368016651.jpg
178/215: ../data/images/405176.jpg
179/215: ../data/images/50091823184.jpg
180/215: ../data/images/707742.jpg
181/215: ../data/images/90268608.jpg
182/215: ../data/images/4707734015.jpg
183/215: ../data/images/510000.jpg
184/215: ../data/images/18502716374.jpg
185/215: ../data/images/501439427603.jpg
186/215: ../data/images/14831381377.jpg
187/215: ../data/images/70249627168.jpg
188/215: ../data/images/900000000000.jpg
189/215: ../data/images/12800000.jpg
190/215: ../data/images/8780830000.jpg
191/215: ../data/images/20292891.jpg
192/215: ../data/images/50000000.jpg
193/215: ../data/images/5436976322.jpg
194/215: ../data/images/16176.jpg
195/215: ../

# Model

In [43]:
# input with shape of height=32 and width=128 
inputs = Input(shape=(32,512,1))
 
# convolution layer with kernel size (3,3)
conv_1 = Conv2D(64, (3,3), activation = 'relu', padding='same')(inputs)
# poolig layer with kernel size (2,2)
pool_1 = MaxPool2D(pool_size=(2, 2), strides=2)(conv_1)
 
conv_2 = Conv2D(128, (3,3), activation = 'relu', padding='same')(pool_1)
pool_2 = MaxPool2D(pool_size=(2, 2), strides=2)(conv_2)
 
conv_3 = Conv2D(256, (3,3), activation = 'relu', padding='same')(pool_2)
 
conv_4 = Conv2D(256, (3,3), activation = 'relu', padding='same')(conv_3)
# poolig layer with kernel size (2,1)
pool_4 = MaxPool2D(pool_size=(2, 1))(conv_4)
 
conv_5 = Conv2D(512, (3,3), activation = 'relu', padding='same')(pool_4)
# Batch normalization layer
batch_norm_5 = BatchNormalization()(conv_5)
 
conv_6 = Conv2D(512, (3,3), activation = 'relu', padding='same')(batch_norm_5)
batch_norm_6 = BatchNormalization()(conv_6)
pool_6 = MaxPool2D(pool_size=(2, 1))(batch_norm_6)
 
conv_7 = Conv2D(512, (2,2), activation = 'relu')(pool_6)
 
squeezed = Lambda(lambda x: K.squeeze(x, 1))(conv_7)
 
# bidirectional LSTM layers with units=128
blstm_1 = Bidirectional(LSTM(128, return_sequences=True, dropout = 0.2))(squeezed)
blstm_2 = Bidirectional(LSTM(128, return_sequences=True, dropout = 0.2))(blstm_1)
 
outputs = Dense(len(words)+1, activation = 'softmax')(blstm_2)

# model to be used at test time
act_model = Model(inputs, outputs)

act_model.summary()

Model: "model_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_4 (InputLayer)         [(None, 32, 512, 1)]      0         
_________________________________________________________________
conv2d_21 (Conv2D)           (None, 32, 512, 64)       640       
_________________________________________________________________
max_pooling2d_12 (MaxPooling (None, 16, 256, 64)       0         
_________________________________________________________________
conv2d_22 (Conv2D)           (None, 16, 256, 128)      73856     
_________________________________________________________________
max_pooling2d_13 (MaxPooling (None, 8, 128, 128)       0         
_________________________________________________________________
conv2d_23 (Conv2D)           (None, 8, 128, 256)       295168    
_________________________________________________________________
conv2d_24 (Conv2D)           (None, 8, 128, 256)       5900

# Loss function

In [44]:
labels = Input(name='the_labels', shape=[max_label_len], dtype='float32')
input_length = Input(name='input_length', shape=[1], dtype='int64')
label_length = Input(name='label_length', shape=[1], dtype='int64')
 
 
def ctc_lambda_func(args):
    y_pred, labels, input_length, label_length = args
 
    return K.ctc_batch_cost(labels, y_pred, input_length, label_length)
 
 
loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([outputs, labels, input_length, label_length])

#model to be used at training time
model = Model(inputs=[inputs, labels, input_length, label_length], outputs=loss_out)

In [45]:
model.compile(loss={'ctc':lambda y_ture, y_pred: y_pred}, optimizer='adam')

file_path = '../checkpoints/best_model.hdf5'

checkpoint = ModelCheckpoint(filepath=file_path, monitor='val_loss', verbose=1,
                            save_best_only=True, mode='auto')
callbacks_list = [checkpoint]

In [46]:
train_img = np.array(train_img)
train_input_length = np.array(train_input_length)
train_label_length = np.array(train_label_length)

test_img = np.array(test_img)
test_input_length = np.array(test_input_length)
test_label_length = np.array(test_label_length)

In [47]:
batch_size = 256
epochs = 10

model.fit(x=[train_img, train_label,
             train_input_length, train_label_length],
              y=np.zeros(len(train_img)), 
              batch_size=batch_size, epochs = epochs, 
              validation_data = ([test_img, test_label, 
                                  test_input_length, test_label_length], 
                                 [np.zeros(len(test_img))]), verbose = 1, 
                              callbacks = callbacks_list)

Epoch 1/10


KeyboardInterrupt: 