# CNN with Bidirctional RNN - Char Classification
Using TensorFlow

## TODO
```
Gap span in dataset creation
Everything
```

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import cv2


%matplotlib notebook
# Increase size of plots
plt.rcParams['figure.figsize'] = (9.0, 5.0)


# Helpers
from ocr.helpers import implt
from ocr.mlhelpers import TrainingPlot, DataSet
from ocr.imgtransform import coordinates_remap
from ocr.datahelpers import loadWordsData, correspondingShuffle
from ocr.tfhelpers import Graph, create_cell

tf.reset_default_graph()
sess = tf.InteractiveSession()

print("OpenCV: " + cv2.__version__)
print("Numpy: " + np.__version__)
print("TensorFlow: " + tf.__version__)

OpenCV: 3.1.0
Numpy: 1.13.1
TensorFlow: 1.2.1


## Loading Images

In [2]:
images, _, gaplines = loadWordsData(['data/words/'],
                                    loadGaplines=True)


Loading words...
Number of Images: 1008


## Settings

In [3]:
PAD = 0                            # Value for PADding images
POS = 1                            # Values of positive and negative label 0/-1
NEG = 0

                                   # TODO implement POS SPAN in dataset creation
POS_SPAN = 0                       # Number of positive values around true position

slider_size = (60, 30)             # Height is set to 60 by data and width should be even
slider_step = 2                    # Number of pixels slider moving
N_INPUT = 1800                     # Size of sequence input vector will depend on CNN
num_buckets = 5
n_classes = 2                      # Number of different outputs

rnn_layers = 2
rnn_residual_layers = 1            # HAVE TO be smaller than encoder_layers
rnn_units = 256

learning_rate = 1e-4               # 1e-4
dropout = 0.2                      # Percentage of dopped out data
train_set = 0.8                    # Percentage of training data

TRAIN_STEPS = 50000                 # Number of training steps!
TEST_ITER = 150
LOSS_ITER = 50
SAVE_ITER = 2000
BATCH_SIZE = 32
# EPOCH = 2000                       # "Number" of batches in epoch


# Function for preprocessing images
graph = Graph('models/gap-clas/CNN-CG',
              operation='h_conv2_flat')
def imgPreprocessor(x):
    # Cut returned data to N_INPUT size - no need for everything
    return graph.run(np.reshape(x, (1, 1800)))[0][:N_INPUT]

INFO:tensorflow:Restoring parameters from models/gap-clas/CNN-CG


## Dataset

In [4]:
# Shuffle data
images, gaplines = correspondingShuffle(images, gaplines)

for i in range(len(images)):
    # Add border and offset gaplines - RUN ONLY ONCE
    images[i] = cv2.copyMakeBorder(images[i],
                                   0, 0, int(slider_size[1]/2), int(slider_size[1]/2),
                                   cv2.BORDER_CONSTANT,
                                   value=0)
    gaplines[i] += int(slider_size[1] / 2)
    

# Split data on train and test dataset
div = int(train_set * len(images))

trainImages = images[0:div]
testImages = images[div:]

trainGaplines = gaplines[0:div]
testGaplines = gaplines[div:]

print("Training images:", div)
print("Testing images:", len(images) - div)

Training images: 806
Testing images: 202


In [5]:
class BucketDataIterator():
    """ Iterator for feeding seq2seq model during training """
    def __init__(self,
                 images,
                 gaplines,
                 gap_span,
                 num_buckets=5,
                 slider=(60, 30),
                 slider_step=2,
                 imgprocess=lambda x: x,
                 train=True):
        
        self.train = train
#         self.slider = slider
#         self.slider_step = slider_step
        length = [(image.shape[1]-slider[1])//slider_step for image in images]
    
        # Creating indices from gaplines
        # TODO Implement gap_span
        indices = gaplines - int(slider[1]/2)
        indices = indices // slider_step
        
        # Split images to sequence of vectors
        # + targets seq of labels per image in images seq
        images_seq = np.empty(len(images), dtype=object)
        targets_seq = np.empty(len(images), dtype=object)
        for i, img in enumerate(images):
            images_seq[i] = [imgprocess(img[:, loc * slider_step: loc * slider_step + slider[1]].flatten())
                             for loc in range(length[i])]
            targets_seq[i] = np.ones((length[i], 1)) * NEG
            if length[i] == indices[i][-1]:
                indices[i][-1] -= 1
            targets_seq[i][indices[i]] = POS
        

        # Create pandas dataFrame and sort it by images seq lenght (length) 
        # in_length == out_length
        self.dataFrame = pd.DataFrame({'length': length,
                                       'images': images_seq,
                                       'targets': targets_seq
                                      }).sort_values('length').reset_index(drop=True)

        bsize = int(len(images) / num_buckets)
        self.num_buckets = num_buckets
        
        # Create buckets by slicing parts by indexes
        self.buckets = []
        for bucket in range(num_buckets-1):
            self.buckets.append(self.dataFrame.iloc[bucket * bsize: (bucket+1) * bsize])
        self.buckets.append(self.dataFrame.iloc[(num_buckets-1) * bsize:])        
        
        self.buckets_size = [len(bucket) for bucket in self.buckets]

        # cursor[i] will be the cursor for the ith bucket
        self.cursor = np.array([0] * num_buckets)
        self.bucket_order = np.random.permutation(num_buckets)
        self.bucket_cursor = 0
        self.shuffle()
        print("Iterator created.")


    def shuffle(self, idx=None):
        """ Shuffle idx bucket or each bucket separately """
        for i in [idx] if idx is not None else range(self.num_buckets):
            self.buckets[i] = self.buckets[i].sample(frac=1).reset_index(drop=True)
            self.cursor[i] = 0


    def next_batch(self, batch_size):
        """
        Creates next training batch of size: batch_size
        Retruns: image seq, letter seq,
                 image seq lengths, letter seq lengths
        """
        i_bucket = self.bucket_order[self.bucket_cursor]
        # Increment cursor and shuffle in case of new round
        self.bucket_cursor = (self.bucket_cursor + 1) % self.num_buckets
        if self.bucket_cursor == 0:
            self.bucket_order = np.random.permutation(self.num_buckets)
            
        if self.cursor[i_bucket] + batch_size > self.buckets_size[i_bucket]:
            self.shuffle(i_bucket)

        # Handle too big batch sizes
        if (batch_size > self.buckets_size[i_bucket]):
            batch_size = self.buckets_size[i_bucket]

        res = self.buckets[i_bucket].iloc[self.cursor[i_bucket]:
                                          self.cursor[i_bucket]+batch_size]
        self.cursor[i_bucket] += batch_size

        # PAD input sequence and output
        # Pad sequences with <PAD> to same length
        max_length = max(res['length'])
        
        input_seq = np.zeros((batch_size, max_length, N_INPUT), dtype=np.float32)
        for i, img in enumerate(res['images']):
            input_seq[i][:res['length'].values[i]] = img
        
        # Need to pad according to the maximum length output sequence
        targets = np.ones([batch_size, max_length, 1], dtype=np.float32) * PAD
        for i, target in enumerate(targets):
            target[:res['length'].values[i]] = res['targets'].values[i]
        
        return input_seq, targets, res['length'].values


    def next_feed(self, size):
        """ Create feed directly for model training """
        (inputs_,
         targets_,
         length_) = self.next_batch(size)
        return {
            inputs: inputs_,            
            targets: targets_,
            length: length_,
            keep_prob: (1.0 - dropout) if self.train else 1.0
        }

In [6]:
# Create iterator for feeding BiRNN
train_iterator = BucketDataIterator(trainImages,
                                    trainGaplines,
                                    POS_SPAN,
                                    num_buckets,
                                    slider_size,
                                    slider_step,
#                                     imgPreprocessor,
                                    train=True)
test_iterator = BucketDataIterator(testImages,
                                   testGaplines,
                                   POS_SPAN,
                                   2,
                                   slider_size,
                                   slider_step,
#                                    imgPreprocessor,
                                   train=False)

Iterator created.
Iterator created.


# Create classifier

## Inputs

In [7]:
# Input placehodlers
# N_INPUT -> size of vector representing one image in sequence
# Inputs shape (max_seq_length, batch_size, vec_size) - time major
inputs = tf.placeholder(shape=(None, None, N_INPUT),
                                dtype=tf.float32,
                                name='inputs')
length = tf.placeholder(shape=(None,),
                        dtype=tf.int32,
                        name='length')
# required for training, not required for testing and application
targets = tf.placeholder(shape=(None, None, 1),
                         dtype=tf.float32,
                         name='targets')
# Dropout value
keep_prob = tf.placeholder(tf.float32, name='keep_prob')

## Bi-RNN

In [8]:
# Cells
cell_fw = create_cell(rnn_units,
                      rnn_layers,
                      rnn_residual_layers,
                      is_dropout=True,
                      keep_prob=keep_prob)
cell_bw = create_cell(rnn_units,
                      rnn_layers,
                      rnn_residual_layers,
                      is_dropout=True,
                      keep_prob=keep_prob)

In [9]:
W = tf.Variable(tf.random_normal([2*rnn_units, 1]))
# b = tf.Variable(tf.random_normal([n_classes]))

# Bidirectional RNN
bi_outputs, _ = tf.nn.bidirectional_dynamic_rnn(
    cell_fw = cell_fw,
    cell_bw = cell_bw,
    inputs = inputs,
    sequence_length = length,
    dtype = tf.float32)

outputs = tf.concat(bi_outputs, -1)

# pred = tf.matmul(outputs, W)
# pred = tf.scan(lambda a, x: tf.matmul(x, W), outputs, infer_shape=False)
pred = tf.layers.dense(inputs=outputs, units=1, activation=tf.sigmoid)

In [10]:
# Testing
sess.run(tf.global_variables_initializer())
fd  = test_iterator.next_feed(5)
print(pred.eval(fd).shape)
print(targets.eval(fd).shape)

(5, 67, 1)
(5, 67, 1)


## Optimizer

In [11]:
# Define loss and optimizer
loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=pred, labels=targets))
train_step = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)

# Evaluate model
correct_pred = tf.equal(tf.round(pred), targets)
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

## Training

In [None]:
sess.run(tf.global_variables_initializer())
saver = tf.train.Saver()

# Creat plot for live stats ploting
trainPlot = TrainingPlot(TRAIN_STEPS, TEST_ITER, LOSS_ITER)

try:
    for i_batch in range(TRAIN_STEPS):
        fd = train_iterator.next_feed(BATCH_SIZE)
        train_step.run(fd)
        
        if i_batch % LOSS_ITER == 0:
            # Plotting loss
            tmpLoss = loss.eval(fd)
            trainPlot.updateCost(tmpLoss, i_batch // LOSS_ITER)
    
        if i_batch % TEST_ITER == 0:
            # Plotting accuracy
            fd_test = test_iterator.next_feed(BATCH_SIZE)
            accTest = accuracy.eval(fd_test)
            accTrain = accuracy.eval(fd)
            trainPlot.updateAcc(accTest, accTrain, i_batch // TEST_ITER)

        if i_batch % SAVE_ITER == 0:
            saver.save(sess, 'models/gap-clas/Bi-RNN')
        
#         if i_batch % EPOCH == 0:
#             fd_test = test_iterator.next_feed(BATCH_SIZE)
#             print('batch %r - loss: %r' % (i_batch, sess.run(loss, fd_test)))
#             predict_, target_ = sess.run([pred, targets], fd_test)
#             for i, (inp, pred) in enumerate(zip(target_, predict_)):
#                 print('    expected  > {}'.format(inp))
#                 print('    predicted > {}'.format(pred))
#                 break
#             print()

except KeyboardInterrupt:
    saver.save(sess, 'models/gap-clas/Bi-RNN')
    print('Training interrupted, model saved.')


fd_test = test_iterator.next_feed(2*BATCH_SIZE)
accTest = accuracy.eval(fd_test)
print("Training finished with accuracy:", accTest)

<IPython.core.display.Javascript object>