In [1]:
'''
Author        : Aditya Jain
Date Created  : 20th November, 2020
About         : This code contains a LSTM model with CTC loss function for primitive segmentation in a video
'''

from tensorflow.keras.layers import Dense, Input, Reshape, TimeDistributed, Lambda, LSTM
from tensorflow.keras.models import Model as KerasModel
import tensorflow.keras.backend as K

from helper_func.lenet import lenet
from helper_func.misc import slide_window
from helper_func.ctc import ctc_decode

import h5py

#### Loading Data (IAM)
This loads the academic IAM dataset

In [2]:
dataset_path = "/home/aditya/Dropbox/LearningfromDemons/ctc_data/iam_lines.h5"

with h5py.File(dataset_path, "r") as f:
    x_train = f['x_train'][:]
    y_train = f['y_train'][:]
    x_test  = f['x_test'][:]
    y_test  = f['y_test'][:]
    
    
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

(7101, 28, 952)
(7101, 97)
(1861, 28, 952)
(1861, 97)


In [13]:
import importlib

datasets_module = importlib.import_module("datasets")
dataset_class_ = getattr(datasets_module, experiment_config["dataset"])
dataset_args = experiment_config.get("dataset_args", {})
dataset = dataset_class_(**dataset_args)
dataset.load_or_generate_data()
print(dataset)

ModuleNotFoundError: No module named 'util'

Subsampling data for a quick test

In [3]:
x_train = x_train[:32,:]
y_train = y_train[:32,:]

input_shape               = x_train.shape[1:]
output_length             = y_train.shape[1]
image_height, image_width = input_shape
window_width              = 12
window_stride             = 5
num_windows               = int((image_width - window_width) / window_stride) + 1
num_classes               = 80
# output_length, num_classes = output_shape

print(input_shape, output_length, num_windows)

(28, 952) 97 189


#### Model Architecture

In [4]:
# CTC architecture has 4 inputs
image_input  = Input(shape=input_shape, name="image")
y_true       = Input(shape=(output_length,), name="y_true")
input_length = Input(shape=(1,), name="input_length")
label_length = Input(shape=(1,), name="label_length")

image_reshaped = Reshape((image_height, image_width, 1))(image_input)
image_patches  = Lambda(slide_window, 
                       arguments={"window_width": window_width, "window_stride": window_stride})(image_reshaped)

# Make a LeNet and get rid of the last two layers (softmax and dropout)
convnet         = lenet((image_height, window_width, 1), (num_classes,))
convnet         = KerasModel(inputs=convnet.inputs, outputs=convnet.layers[-2].output)      # (num_windows, 12
convnet_outputs = TimeDistributed(convnet)(image_patches)

# (num_windows, 128)
lstm_output     = LSTM(128, return_sequences=True)(convnet_outputs)                          

# (num_windows, num_classes)
softmax_output  = Dense(num_classes, activation="softmax", name="softmax_output")(lstm_output)
    
    
input_length_processed = Lambda(lambda x, num_windows=None: x * num_windows, 
                                arguments={"num_windows": num_windows})(input_length)

ctc_loss_output        = Lambda(lambda x: K.ctc_batch_cost(x[0], x[1], x[2], x[3]), 
                         name="ctc_loss")([y_true, softmax_output, input_length_processed, label_length])
ctc_decoded_output     = Lambda(lambda x: ctc_decode(x[0], x[1], output_length), name="ctc_decoded")(
        [softmax_output, input_length_processed])

model = KerasModel(
        inputs=[image_input, y_true, input_length, label_length], outputs=[ctc_loss_output, ctc_decoded_output],)

model.compile(optimizer="adam")
model.summary()

Instructions for updating:
Create a `tf.sparse.SparseTensor` and use `tf.sparse.to_dense` instead.
Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
image (InputLayer)              [(None, 28, 952)]    0                                            
__________________________________________________________________________________________________
reshape (Reshape)               (None, 28, 952, 1)   0           image[0][0]                      
__________________________________________________________________________________________________
lambda (Lambda)                 (None, 189, 28, 12,  0           reshape[0][0]                    
__________________________________________________________________________________________________
time_distributed (TimeDistribut (None, 189, 128)     412160      lambda[0][0]               

In [5]:
model.fit(x_train, y_train, epochs=10)

Epoch 1/10


AssertionError: in user code:

    /home/aditya/miniconda3/envs/fsdl-text-recognizer/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py:505 train_function  *
        outputs = self.distribute_strategy.run(
    /home/aditya/miniconda3/envs/fsdl-text-recognizer/lib/python3.7/site-packages/tensorflow/python/distribute/distribute_lib.py:951 run  **
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    /home/aditya/miniconda3/envs/fsdl-text-recognizer/lib/python3.7/site-packages/tensorflow/python/distribute/distribute_lib.py:2290 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    /home/aditya/miniconda3/envs/fsdl-text-recognizer/lib/python3.7/site-packages/tensorflow/python/distribute/distribute_lib.py:2649 _call_for_each_replica
        return fn(*args, **kwargs)
    /home/aditya/miniconda3/envs/fsdl-text-recognizer/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py:465 train_step  **
        y_pred = self(x, training=True)
    /home/aditya/miniconda3/envs/fsdl-text-recognizer/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer.py:927 __call__
        outputs = call_fn(cast_inputs, *args, **kwargs)
    /home/aditya/miniconda3/envs/fsdl-text-recognizer/lib/python3.7/site-packages/tensorflow/python/keras/engine/network.py:714 call
        convert_kwargs_to_constants=base_layer_utils.call_context().saving)
    /home/aditya/miniconda3/envs/fsdl-text-recognizer/lib/python3.7/site-packages/tensorflow/python/keras/engine/network.py:894 _run_internal_graph
        assert str(id(x)) in tensor_dict, 'Could not compute output ' + str(x)

    AssertionError: Could not compute output Tensor("ctc_loss/Identity:0", shape=(None, 1), dtype=float32)
