In [3]:
'''
Author        : Aditya Jain
Date Created  : 20th November, 2020
About         : This code contains a LSTM model with CTC loss function for primitive segmentation in a video
'''

from tensorflow.keras.layers import Dense, Input, Reshape, TimeDistributed, Lambda, LSTM
from tensorflow.keras.models import Model as KerasModel
import tensorflow.keras.backend as K

from helper_func.lenet import lenet
from helper_func.misc import slide_window
from helper_func.ctc import ctc_decode

import h5py

#### Loading Data (IAM)
This loads the academic IAM dataset

In [29]:
dataset_path = "/home/aditya/Dropbox/LearningfromDemons/ctc_data/iam_lines.h5"

with h5py.File(dataset_path, "r") as f:
    x_train = f['x_train'][:]
    y_train = f['y_train'][:]
    x_test  = f['x_test'][:]
    y_test  = f['y_test'][:]
    
    print(f.mapping())
    
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

AttributeError: 'File' object has no attribute 'mapping'

Subsampling data for a quick test

In [26]:
x_train = x_train[:32,:]
y_train = y_train[:32,:]

input_shape               = x_train.shape[1:]
output_length             = y_train.shape[1]
image_height, image_width = input_shape
window_width              = 28
window_stride             = 14
num_windows               = int((image_width - window_width) / window_stride) + 1
num_classes               = 80
# output_length, num_classes = output_shape

print(input_shape, output_length)

(28, 952) 97


#### Model Architecture

In [27]:
# CTC architecture has 4 inputs
image_input  = Input(shape=input_shape, name="image")
y_true       = Input(shape=(output_length,), name="y_true")
input_length = Input(shape=(1,), name="input_length")
label_length = Input(shape=(1,), name="label_length")

image_reshaped = Reshape((image_height, image_width, 1))(image_input)
image_patches  = Lambda(slide_window, 
                       arguments={"window_width": window_width, "window_stride": window_stride})(image_reshaped)

# Make a LeNet and get rid of the last two layers (softmax and dropout)
convnet         = lenet((image_height, window_width, 1), (num_classes,))
convnet         = KerasModel(inputs=convnet.inputs, outputs=convnet.layers[-2].output)      # (num_windows, 12
convnet_outputs = TimeDistributed(convnet)(image_patches)

# (num_windows, 128)
lstm_output     = LSTM(128, return_sequences=True)(convnet_outputs)                          

# (num_windows, num_classes)
softmax_output  = Dense(num_classes, activation="softmax", name="softmax_output")(lstm_output)
    
    
input_length_processed = Lambda(lambda x, num_windows=None: x * num_windows, 
                                arguments={"num_windows": num_windows})(input_length)

ctc_loss_output        = Lambda(lambda x: K.ctc_batch_cost(x[0], x[1], x[2], x[3]), 
                         name="ctc_loss")([y_true, softmax_output, input_length_processed, label_length])
ctc_decoded_output     = Lambda(lambda x: ctc_decode(x[0], x[1], output_length), name="ctc_decoded")(
        [softmax_output, input_length_processed])

model = KerasModel(
        inputs=[image_input, y_true, input_length, label_length], outputs=[ctc_loss_output, ctc_decoded_output],)

NameError: name 'num_classes' is not defined

In [31]:
from tensorflow.keras.layers import Conv2D, Dense, Dropout, Flatten, Lambda, MaxPooling2D
from tensorflow.keras.models import Sequential, Model

model = Sequential()
model.add(Input(shape = (1)))
model.add(Dense(10))
model.add(Lambda(lambda x: x**2))

model.compile()
model.summary()

model.predict([10])

Model: "sequential_9"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_9 (Dense)              (None, 10)                20        
_________________________________________________________________
lambda_6 (Lambda)            (None, 10)                0         
Total params: 20
Trainable params: 20
Non-trainable params: 0
_________________________________________________________________


array([[2.3628315e+01, 3.6925836e+00, 9.9704494e+00, 5.6475468e+00,
        2.6805374e-01, 3.9936134e+01, 1.7122160e+01, 1.9566708e+01,
        2.2958885e+01, 1.1936594e-02]], dtype=float32)