In [1]:
import os
import fnmatch
import cv2
import numpy as np
import string
import time
import json

from keras.preprocessing.sequence import pad_sequences

from tensorflow.keras.layers import Dense, LSTM, Reshape, BatchNormalization, Input, Conv2D, MaxPool2D, Lambda, Bidirectional
from tensorflow.keras.models import Model
from tensorflow.keras.activations import relu, sigmoid, softmax
import tensorflow.keras.backend as K
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import ModelCheckpoint

from tensorflow.keras.utils import Sequence
import tensorflow.compat.v1 as tf

tf.disable_v2_behavior
#ignore warnings in the output
tf.logging.set_verbosity(tf.logging.ERROR)

from tensorflow.python.client import device_lib

# Check all available devices if GPU is available
print(device_lib.list_local_devices())
sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))


def CRNN:
    # input with shape of height=32 and width=128 
    #inputs = Input(shape=(32,128,1))
    inputs = Input(shape=(32,128,1), name = 'image_input')


    # convolution layer with kernel size (3,3)
    conv_1 = Conv2D(64, (3,3), activation = 'relu', padding='same')(inputs)
    # poolig layer with kernel size (2,2)
    pool_1 = MaxPool2D(pool_size=(2, 2), strides=2)(conv_1)

    conv_2 = Conv2D(128, (3,3), activation = 'relu', padding='same')(pool_1)
    pool_2 = MaxPool2D(pool_size=(2, 2), strides=2)(conv_2)

    conv_3 = Conv2D(256, (3,3), activation = 'relu', padding='same')(pool_2)

    conv_4 = Conv2D(256, (3,3), activation = 'relu', padding='same')(conv_3)
    # poolig layer with kernel size (2,1)
    pool_4 = MaxPool2D(pool_size=(2, 1))(conv_4)

    conv_5 = Conv2D(512, (3,3), activation = 'relu', padding='same')(pool_4)
    # Batch normalization layer
    batch_norm_5 = BatchNormalization()(conv_5)

    conv_6 = Conv2D(512, (3,3), activation = 'relu', padding='same')(batch_norm_5)
    batch_norm_6 = BatchNormalization()(conv_6)
    pool_6 = MaxPool2D(pool_size=(2, 1))(batch_norm_6)

    conv_7 = Conv2D(512, (2,2), activation = 'relu')(pool_6)

    squeezed = Lambda(lambda x: K.squeeze(x, 1))(conv_7)

    max_string_len = squeezed.shape[1]

    # bidirectional LSTM layers with units=128
    blstm_1 = Bidirectional(LSTM(128, return_sequences=True, dropout = 0.2))(squeezed)
    blstm_2 = Bidirectional(LSTM(128, return_sequences=True, dropout = 0.2))(blstm_1)

    outputs = Dense(len(label_num_word)+1, activation = 'softmax')(blstm_2)

    model_pred = Model(inputs, outputs)



    labels = Input(name='label_input', shape=[max_string_len], dtype='float32')
    input_length = Input(name='input_length', shape=[1], dtype='int64')
    label_length = Input(name='label_length', shape=[1], dtype='int64')

    def ctc_lambda_func(args):
        y_pred, labels, input_length, label_length = args

        return K.ctc_batch_cost(labels, y_pred, input_length, label_length)


    loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([outputs, labels, input_length, label_length])

    #model to be used at training time
    model_train = Model(inputs=[inputs, labels, input_length, label_length], outputs=loss_out)

    return model_train, model_pred

Using TensorFlow backend.


In [4]:
 with open('./label_word_num.json',encoding='UTF8') as f:
    label_word_num = json.load(f)
with open('./label_num_word.json',encoding='UTF8') as f:
    label_num_word = json.load(f)
with open('./label_text.json',encoding='UTF8') as f:
    label_text = json.load(f)
with open('./img_list.json',encoding='UTF8') as f:
    img_list = json.load(f)

In [5]:
# input with shape of height=32 and width=128 
#inputs = Input(shape=(32,128,1))
inputs = Input(shape=(32,128,1), name = 'image_input')

 
# convolution layer with kernel size (3,3)
conv_1 = Conv2D(64, (3,3), activation = 'relu', padding='same')(inputs)
# poolig layer with kernel size (2,2)
pool_1 = MaxPool2D(pool_size=(2, 2), strides=2)(conv_1)
 
conv_2 = Conv2D(128, (3,3), activation = 'relu', padding='same')(pool_1)
pool_2 = MaxPool2D(pool_size=(2, 2), strides=2)(conv_2)
 
conv_3 = Conv2D(256, (3,3), activation = 'relu', padding='same')(pool_2)
 
conv_4 = Conv2D(256, (3,3), activation = 'relu', padding='same')(conv_3)
# poolig layer with kernel size (2,1)
pool_4 = MaxPool2D(pool_size=(2, 1))(conv_4)
 
conv_5 = Conv2D(512, (3,3), activation = 'relu', padding='same')(pool_4)
# Batch normalization layer
batch_norm_5 = BatchNormalization()(conv_5)
 
conv_6 = Conv2D(512, (3,3), activation = 'relu', padding='same')(batch_norm_5)
batch_norm_6 = BatchNormalization()(conv_6)
pool_6 = MaxPool2D(pool_size=(2, 1))(batch_norm_6)
 
conv_7 = Conv2D(512, (2,2), activation = 'relu')(pool_6)
 
squeezed = Lambda(lambda x: K.squeeze(x, 1))(conv_7)

max_string_len = squeezed.shape[1]

# bidirectional LSTM layers with units=128
blstm_1 = Bidirectional(LSTM(128, return_sequences=True, dropout = 0.2))(squeezed)
blstm_2 = Bidirectional(LSTM(128, return_sequences=True, dropout = 0.2))(blstm_1)
 
outputs = Dense(len(label_num_word)+1, activation = 'softmax')(blstm_2)
 
act_model = Model(inputs, outputs)

model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer = 'adam')
 
filepath="best_model.hdf5"
checkpoint = ModelCheckpoint(filepath=filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='auto')
callbacks_list = [checkpoint]

In [6]:
act_model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
image_input (InputLayer)     [(None, 32, 128, 1)]      0         
_________________________________________________________________
conv2d (Conv2D)              (None, 32, 128, 64)       640       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 16, 64, 64)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 16, 64, 128)       73856     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 8, 32, 128)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 8, 32, 256)        295168    
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 8, 32, 256)        590080

In [7]:
labels = Input(name='label_input', shape=[max_string_len], dtype='float32')
input_length = Input(name='input_length', shape=[1], dtype='int64')
label_length = Input(name='label_length', shape=[1], dtype='int64')
 
def ctc_lambda_func(args):
    y_pred, labels, input_length, label_length = args
 
    return K.ctc_batch_cost(labels, y_pred, input_length, label_length)
 

loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([outputs, labels, input_length, label_length])

#model to be used at training time
model = Model(inputs=[inputs, labels, input_length, label_length], outputs=loss_out)



# def ctc_lambda_func(args):
#     y_pred, labels, input_length, label_length = args
 
#     return K.ctc_batch_cost(labels, y_pred, input_length, label_length)

# labels = Input(name='label_input', shape=[max_string_len], dtype='float32')
# input_length = Input(name='input_length', shape=[1], dtype='int64')
# label_length = Input(name='label_length', shape=[1], dtype='int64') 

# loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([outputs, labels, input_length, label_length])

# #model to be used at training time
# model = Model(inputs=[inputs, labels, input_length, label_length], outputs=loss_out)

In [8]:
model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer = 'adam')
 
filepath="best_model.hdf5"
checkpoint = ModelCheckpoint(filepath=filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='auto')
callbacks_list = [checkpoint]

In [9]:
# training_img = []
# train_padded_txt = []
# train_input_length = []
# train_label_length = []

# valid_img = []
# valid_padded_txt = []
# valid_input_length = []
# valid_label_length = []

# temp2 = []

# for i in range(10000):
#     temp = cv2.imread(img_list[i], cv2.IMREAD_GRAYSCALE)
#     temp = temp.reshape(temp.shape + (1,))
#     temp = temp / 255.0
#     training_img.append(temp)
#     train_padded_txt.append(label_text[i])
#     train_input_length.append(17)
#     train_label_length.append(len(label_text[i]))
    
# for i in range(10000,11000):
#     temp = cv2.imread(img_list[i], cv2.IMREAD_GRAYSCALE)
#     temp = temp.reshape(temp.shape + (1,))
#     temp = temp / 255.0
#     valid_img.append(temp)
#     valid_padded_txt.append(label_text[i])
#     valid_input_length.append(17)
#     valid_label_length.append(len(label_text[i]))

In [10]:
for i in range(len(label_text)):
    for j in range(17,31):
        label_text[i].append(0)

In [11]:
# training_img = np.array(training_img)
# train_padded_txt = np.array(train_padded_txt)
# train_input_length = np.array(train_input_length)
# train_label_length = np.array(train_label_length)

# valid_img = np.array(valid_img)
# valid_padded_txt = np.array(valid_padded_txt)
# valid_input_length = np.array(valid_input_length)
# valid_label_length = np.array(valid_label_length)

# batch_size = 256
# epochs = 10
# model.fit(x=[training_img, train_padded_txt, train_input_length, train_label_length], 
#           y=np.zeros(len(training_img)), 
#           batch_size=batch_size, 
#           epochs = epochs, 
#           validation_data = ([valid_img, valid_padded_txt, valid_input_length, valid_label_length], [np.zeros(len(valid_img))]), 
#           verbose = 1, callbacks = callbacks_list)

In [12]:
import sklearn.model_selection
train_x, test_x, train_y, test_y = sklearn.model_selection.train_test_split(img_list, label_text, test_size=0.2, random_state=1)

In [13]:
train_x, val_x, train_y, val_y = sklearn.model_selection.train_test_split(train_x, train_y, test_size=0.2, random_state=1)

In [14]:
print(len(train_x))
print(len(train_y))
print(len(val_x))
print(len(val_y))
print(len(test_x))
print(len(test_y))
print(train_y[0])

72329
72329
18083
18083
22604
22604
[327, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]


In [15]:
from keras.models import Sequential
from data_generator import DataGenerator

train_data = DataGenerator(train_x, train_y, 128)
val_data = DataGenerator(val_x, val_y,128)
test_data = DataGenerator(test_x, test_y, 128)

train_steps = len(train_x) // 128
val_steps = len(val_x) // 128
epochs = 50

In [None]:
model.fit_generator(generator=train_data, # batch_size here?
                    steps_per_epoch=train_steps,
                    epochs=1,
                    validation_data=val_data, # batch_size here?
                    validation_steps=train_steps,   
                    callbacks = callbacks_list,
                    initial_epoch=0)

Exception in thread Thread-6:
Traceback (most recent call last):
  File "C:\Users\jhkim\Anaconda3\lib\threading.py", line 916, in _bootstrap_inner
    self.run()
  File "C:\Users\jhkim\Anaconda3\lib\threading.py", line 864, in run
    self._target(*self._args, **self._kwargs)
  File "C:\Users\jhkim\Anaconda3\lib\site-packages\tensorflow_core\python\keras\utils\data_utils.py", line 742, in _run
    sequence = list(range(len(self.sequence)))
TypeError: 'numpy.float64' object cannot be interpreted as an integer

