In [5]:
'''
Author       : Aditya Jain
Date Started : This notebook was created on 2nd December, 2020
About        : Implementing CNN+RNN+CTC
'''

from tensorflow import keras
from tensorflow.keras.layers import Dropout, Dense, Input, Reshape, TimeDistributed, Lambda, LSTM, Bidirectional, Conv2D, MaxPooling2D, Flatten
import tensorflow.keras.backend as K


from tensorflow.keras.models import Model 
from tensorflow.keras.utils import to_categorical
import tensorflow as tf

#### Importing Data

In [2]:
import h5py
import matplotlib.pyplot as plt
from helper_func.misc import slide_window

dataset_path = "/home/aditya/Dropbox/LearningfromDemons/ctc_data/iam_lines.h5"
no_classes   = 80

with h5py.File(dataset_path, "r") as f:
    x_train = f['x_train'][:]
    y_train = f['y_train'][:]
    x_test  = f['x_test'][:]
    y_test  = f['y_test'][:]
    

x_train = x_train[:1000]
y_train = y_train[:1000]
x_test  = x_test[:1000]
y_test  = y_test[:1000]
    
# y_train = to_categorical(y_train, no_classes)
y_test  = to_categorical(y_train, no_classes)

print(x_train.shape)
print(y_train.shape)

(1000, 28, 952)
(1000, 97)


#### Defining LeNet Keras

In [17]:
#### Doing Here

image_shape = x_train.shape[1:]        # the image shape
no_channels = 1                        # no of channels in the image, 3 in case of RGB
print(image_shape)

no_classes    = 80
output_length = 97

# architecture is defined below

inputs     = Input(shape=(image_shape[0], image_shape[1], no_channels))
conv_1     = Conv2D(32, (3,3), activation = 'relu', padding='same')(inputs)
max_pool1  = MaxPooling2D(pool_size=(2, 2))(conv_1)
conv_2     = Conv2D(64, (3,3), activation = 'relu', padding='same')(max_pool1)
max_pool2  = MaxPooling2D(pool_size=(2, 2))(conv_2)
reshape    = Reshape(target_shape=(int(image_shape[0]/4), int(image_shape[1]/4*64)))(max_pool2)
dense1     = Dense(64)(reshape)                                                  # this dense helps reduce no of params
blstm1     = Bidirectional(LSTM(64, return_sequences=True))(reshape)
output     = Dense(no_classes+1, activation="softmax")(blstm1)


model_arch = Model(inputs, output)           # for viz the model architecture
model_arch.summary()

(28, 952)
Model: "functional_15"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_13 (InputLayer)        [(None, 28, 952, 1)]      0         
_________________________________________________________________
conv2d_24 (Conv2D)           (None, 28, 952, 32)       320       
_________________________________________________________________
max_pooling2d_24 (MaxPooling (None, 14, 476, 32)       0         
_________________________________________________________________
conv2d_25 (Conv2D)           (None, 14, 476, 64)       18496     
_________________________________________________________________
max_pooling2d_25 (MaxPooling (None, 7, 238, 64)        0         
_________________________________________________________________
reshape_7 (Reshape)          (None, 7, 15232)          0         
_________________________________________________________________
bidirectional_4 (Bidirection (None, 7, 128)