## Dataload

In [1]:
import numpy as np
import glob
import os
import random
import re

In [2]:
X_train_text = [i.split("\\")[-1].split("_")[0] for i in glob.glob(os.getcwd() + "\datagen\\data\\train\\*")]
X_test_text = [i.split("\\")[-1].split("_")[0] for i in glob.glob(os.getcwd() + "\datagen\\data\\test\\*")]
X_train = np.load("datagen/data/train.npy")
X_test = np.load("datagen/data/test.npy")


In [3]:
regex = r'^[a-z ]+$'
alphabet = 'abcdefghijklmnopqrstuvwxyz'

# Translation of characters to unique integer values
def text_to_labels(text):
    ret = []
    for char in text:
        ret.append(alphabet.find(char))
    return ret


# Reverse translation of numerical classes back to characters
def labels_to_text(labels):
    ret = []
    for c in labels:
        if c == len(alphabet):  # CTC Blank
            ret.append("")
        else:
            ret.append(alphabet[c])
    return "".join(ret)


# only a-z and space..probably not to difficult
# to expand to uppercase and symbols

def is_valid_str(in_str):
    search = re.compile(regex, re.UNICODE).search
    return bool(search(in_str))

max_string_len = max(max([len(i) for i in X_train_text]), max([len(i) for i in X_test_text]))

def vectorize_input(x, y, train):
    mono_fraction=0.5
    num_words = len(y)
    string_list = y.copy()
    tmp_string_list = y.copy()
    
    Y_data = np.ones([num_words, max_string_len]) * -1
    X_text = []
    Y_len = [0] * num_words

    if len(tmp_string_list) != num_words:
        raise IOError('Could not pull enough words from supplied monogram and bigram files. ')
    # interlace to mix up the easy and hard words
    #string_list[::2] = tmp_string_list[:num_words // 2]
    #string_list[1::2] = tmp_string_list[num_words // 2:]

    
    for i, word in enumerate(string_list):
        Y_len[i] = len(word)
        Y_data[i, 0:len(word)] = text_to_labels(word)
        X_text.append(word)
    Y_len = np.expand_dims(np.array(Y_len), 1)
    
    val_split = 0.2

    cur_val_index = val_split
    cur_train_index = 0

    size = len(y)


    labels = np.ones([size, max_string_len])
    input_length = np.zeros([size, 1])
    label_length = np.zeros([size, 1])
    blank_label = len(alphabet)
    for i in range(size):
        if train and i > size - 4:
            labels[i, :] = Y_data[i]
            input_length[i] = 7
            label_length[i] = Y_len[i]
        else:
            labels[i, :] = Y_data[i]
            input_length[i] = 7
            label_length[i] = Y_len[i]
            
            
    '''
    inputs = {'the_input': x,
                  'the_labels': labels,
                  'input_length': input_length,
                  'label_length': label_length,
                  }
    outputs = {'ctc': np.zeros([size])}
    return (inputs, outputs)
    '''
    return labels, input_length, label_length

In [4]:
X_train_formod = vectorize_input(X_train, X_train_text, True)
X_test_formod = vectorize_input(X_test, X_test_text, False)

In [12]:
def yield_train(batch_size):
    train_i = 0
    max_len_train = len(X_train_text)
    while 1:
        if(train_i + batch_size <= max_len_train):
            train_i = 0


            inputs = {'the_input': X_train[train_i: train_i + batch_size ],
                      'the_labels': X_train_formod[0][train_i:train_i + batch_size ],
                      'input_length': X_train_formod[1][train_i:train_i + batch_size ],
                      'label_length': X_train_formod[2][train_i:train_i + batch_size ],
                      }
        outputs = {'ctc': np.zeros([batch_size])}
        train_i += batch_size

        yield (inputs, outputs)
    
def yield_test(batch_size):
    test_i = 0
    max_len_test = len(X_test_text)
    while 1:
        if(test_i + batch_size <= max_len_test):
            test_i = 0


            inputs = {'the_input': X_test[test_i: test_i + batch_size],
                      'the_labels': X_test_formod[0][test_i:test_i + batch_size],
                      'input_length': X_test_formod[1][test_i:test_i + batch_size],
                      'label_length': X_test_formod[2][test_i:test_i + batch_size],
                      }
        outputs = {'ctc': np.zeros([batch_size])}
        test_i += batch_size
        yield (inputs, outputs)

## Model

In [7]:
from keras.datasets import imdb
from keras.models import Sequential
from keras.layers import Dense, LSTM, Activation, Input, Reshape, GRU, Lambda
from keras.layers.embeddings import Embedding
from keras.preprocessing import sequence
# fix random seed for reproducibility
from keras import backend as K
from keras.layers.merge import add, concatenate
from keras.models import Model
from keras.optimizers import SGD

np.random.seed(7)

Using TensorFlow backend.


### Some helpful func

In [8]:
def ctc_lambda_func(args):
    y_pred, labels, input_length, label_length = args
    # the 2 is critical here since the first couple outputs of the RNN
    # tend to be garbage:
    y_pred = y_pred[:, 2:, :]
    return K.ctc_batch_cost(labels, y_pred, input_length, label_length)

# For a real OCR application, this should be beam search with a dictionary
# and language model.  For this example, best path is sufficient.

def decode_batch(test_func, word_batch):
    out = test_func([word_batch])[0]
    ret = []
    for j in range(out.shape[0]):
        out_best = list(np.argmax(out[j, 2:], 1))
        out_best = [k for k, g in itertools.groupby(out_best)]
        outstr = labels_to_text(out_best)
        ret.append(outstr)
    return ret

In [17]:
import datetime
def train(run_name, start_epoch, stop_epoch):
    # Input Parameters
    img_h = 7
    img_w = 7
    words_per_epoch = 160
    val_split = 0.2
    val_words = 100
    
    
    time_dense_size = 32
    rnn_size = 512
    minibatch_size = 32


    if K.image_data_format() == 'channels_first':
        input_shape = (512, img_w, img_h)
    else:
        input_shape = (img_w, img_h, 512)

    
    act = 'relu'
    input_data = Input(name='the_input', shape=input_shape, dtype='float32')
    
    conv_to_rnn_dims = (7, 512 * 7)
    
    inner = Reshape(target_shape=conv_to_rnn_dims, name='reshape')(input_data)

    # cuts down input size going into RNN:
    inner = Dense(time_dense_size, activation=act, name='dense1')(inner)

    # Two layers of bidirectional GRUs
    # GRU seems to work as well, if not better than LSTM:
    gru_1 = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru1')(inner)
    gru_1b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru1_b')(inner)
    gru1_merged = add([gru_1, gru_1b])
    gru_2 = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru2')(gru1_merged)
    gru_2b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru2_b')(gru1_merged)

    # transforms RNN output to character activations:
    inner = Dense(len(alphabet) + 1, kernel_initializer='he_normal',
                  name='dense2')(concatenate([gru_2, gru_2b]))
    
    
    y_pred = Activation('softmax', name='softmax')(inner)
    Model(inputs=input_data, outputs=y_pred).summary()

    
    
    labels = Input(name='the_labels', shape=[max_string_len], dtype='float32')
    input_length = Input(name='input_length', shape=[1], dtype='int64')
    label_length = Input(name='label_length', shape=[1], dtype='int64')
    # Keras doesn't currently support loss funcs with extra parameters
    # so CTC loss is implemented in a lambda layer
    loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([y_pred, labels, input_length, label_length])

    # clipnorm seems to speeds up convergence
    sgd = SGD(lr=0.02, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5)

    model = Model(inputs=[input_data, labels, input_length, label_length], outputs=loss_out)

    # the loss calc occurs elsewhere, so use a dummy lambda func for the loss
    model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=sgd)
    if start_epoch > 0:
        weight_file = os.path.join(OUTPUT_DIR, os.path.join(run_name, 'weights%02d.h5' % (start_epoch - 1)))
        model.load_weights(weight_file)
    # captures output of softmax so we can decode the output during visualization
    test_func = K.function([input_data], [y_pred])

    
    model.fit_generator(yield_train(20),
                        steps_per_epoch=(words_per_epoch - val_words) // minibatch_size,
                        epochs=stop_epoch,
                        validation_data=yield_test(20),
                        validation_steps=val_words // minibatch_size)


if __name__ == '__main__':
    run_name = datetime.datetime.now().strftime('%Y:%m:%d:%H:%M:%S')
    train(run_name, 0, 20)
    # increase to wider images and start at epoch 20. The learned weights are reloaded

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
the_input (InputLayer)           (None, 7, 7, 512)     0                                            
____________________________________________________________________________________________________
reshape (Reshape)                (None, 7, 3584)       0           the_input[0][0]                  
____________________________________________________________________________________________________
dense1 (Dense)                   (None, 7, 32)         114720      reshape[0][0]                    
____________________________________________________________________________________________________
gru1 (GRU)                       (None, 7, 512)        837120      dense1[0][0]                     
___________________________________________________________________________________________

InvalidArgumentError: label SparseTensor is not valid: indices[20] = [3,5] is out of bounds: need 0 <= index < [20,5]
	 [[Node: ctc_5/CTCLoss = CTCLoss[ctc_merge_repeated=true, preprocess_collapse_repeated=false, _device="/job:localhost/replica:0/task:0/cpu:0"](ctc_5/Log/_281, ctc_5/ToInt64/_283, ctc_5/ToInt32_2/_285, ctc_5/ToInt32_1/_287)]]

Caused by op 'ctc_5/CTCLoss', defined at:
  File "C:\ProgramData\Anaconda3\lib\runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "C:\ProgramData\Anaconda3\lib\runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "C:\ProgramData\Anaconda3\lib\site-packages\ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "C:\ProgramData\Anaconda3\lib\site-packages\traitlets\config\application.py", line 658, in launch_instance
    app.start()
  File "C:\ProgramData\Anaconda3\lib\site-packages\ipykernel\kernelapp.py", line 477, in start
    ioloop.IOLoop.instance().start()
  File "C:\ProgramData\Anaconda3\lib\site-packages\zmq\eventloop\ioloop.py", line 177, in start
    super(ZMQIOLoop, self).start()
  File "C:\ProgramData\Anaconda3\lib\site-packages\tornado\ioloop.py", line 888, in start
    handler_func(fd_obj, events)
  File "C:\ProgramData\Anaconda3\lib\site-packages\tornado\stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "C:\ProgramData\Anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "C:\ProgramData\Anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "C:\ProgramData\Anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "C:\ProgramData\Anaconda3\lib\site-packages\tornado\stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "C:\ProgramData\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "C:\ProgramData\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 235, in dispatch_shell
    handler(stream, idents, msg)
  File "C:\ProgramData\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "C:\ProgramData\Anaconda3\lib\site-packages\ipykernel\ipkernel.py", line 196, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "C:\ProgramData\Anaconda3\lib\site-packages\ipykernel\zmqshell.py", line 533, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2698, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2802, in run_ast_nodes
    if self.run_code(code, result):
  File "C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-17-48f9697c83e8>", line 80, in <module>
    train(run_name, 0, 20)
  File "<ipython-input-17-48f9697c83e8>", line 55, in train
    loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([y_pred, labels, input_length, label_length])
  File "C:\ProgramData\Anaconda3\lib\site-packages\keras\engine\topology.py", line 602, in __call__
    output = self.call(inputs, **kwargs)
  File "C:\ProgramData\Anaconda3\lib\site-packages\keras\layers\core.py", line 650, in call
    return self.function(inputs, **arguments)
  File "<ipython-input-8-1c18c308ee84>", line 6, in ctc_lambda_func
    return K.ctc_batch_cost(labels, y_pred, input_length, label_length)
  File "C:\ProgramData\Anaconda3\lib\site-packages\keras\backend\tensorflow_backend.py", line 3662, in ctc_batch_cost
    sequence_length=input_length), 1)
  File "C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\ops\ctc_ops.py", line 145, in ctc_loss
    ctc_merge_repeated=ctc_merge_repeated)
  File "C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\ops\gen_ctc_ops.py", line 164, in _ctc_loss
    name=name)
  File "C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 768, in apply_op
    op_def=op_def)
  File "C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 2336, in create_op
    original_op=self._default_original_op, op_def=op_def)
  File "C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 1228, in __init__
    self._traceback = _extract_stack()

InvalidArgumentError (see above for traceback): label SparseTensor is not valid: indices[20] = [3,5] is out of bounds: need 0 <= index < [20,5]
	 [[Node: ctc_5/CTCLoss = CTCLoss[ctc_merge_repeated=true, preprocess_collapse_repeated=false, _device="/job:localhost/replica:0/task:0/cpu:0"](ctc_5/Log/_281, ctc_5/ToInt64/_283, ctc_5/ToInt32_2/_285, ctc_5/ToInt32_1/_287)]]


(5011, 7, 7, 512)


'channels_last'