In [1]:
import numpy as np
import pylab as plt

import tensorflow as tf
from tensorflow.python.ops import rnn, rnn_cell
from tensorflow.python.ops import ctc_ops as ctc

In [2]:
num_classes = 10
max_y_seq_len = 10

def gen_data(max_y_seq_len, num_classes):
    """ generate sequence
    
    sequences are binary representation of the class number (without any noise)
    
    example: 
        sequence:
          [ 0.  0.  1.  0.  1.  0.  1.  1.  0.]
          [ 1.  0.  0.  1.  0.  0.  0.  0.  0.]
        transcription:
          [0 5 3]

    Args:
        max_y_seq_len: max length of the sequence
        num_classes: number of classes
    Returns: 
        sequence and transcription
    """
    seq_len = np.random.randint(1,max_y_seq_len)
    y = np.random.randint(num_classes-1,size = [seq_len])
    x = np.zeros([sum([len(bin(tmp)[2:]) for tmp in y])+seq_len,2])
    pos = 0
    for i in range(seq_len):
        b = bin(y[i])[2:]
        for j,c in enumerate(b):
            x[pos+j,0] = int(c)
            x[pos+j,1] = 1-int(c)
        pos += len(b)+1
    return x,y

example = gen_data(4, num_classes)
print("sequence:")
print(" ",example[0][:,0])
print(" ",example[0][:,1])
print("transcription:")
print(" ",example[1])

sequence:
  [ 1.  0.  0.  0.  0.  1.  1.  1.  0.]
  [ 0.  1.  1.  1.  0.  0.  0.  0.  0.]
transcription:
  [8 7]


In [3]:
max_x_len = max_y_seq_len*int(np.ceil(np.log(num_classes)/np.log(2))+1)
def new_batch(batch_size):
    x = np.zeros([batch_size, max_x_len, 2])
    y_indices = []
    y_values = []
    y_shape = []
    x_seq_length = []
    
    for b in range(batch_size):
        x_new, y_new = gen_data(max_y_seq_len, num_classes)
        x[b][:len(x_new)] = x_new
        x_seq_length.append(len(x_new))
        for t in range(len(y_new)):
            y_indices.append([b,t])
            y_values.append(y_new[t])
    y_shape = [batch_size, max_y_seq_len]
    return np.array(x), x_seq_length, [y_indices,y_values,y_shape]


In [None]:
batch_size = 50
num_hidden_neuron = 10

#input and targets
x = tf.placeholder(tf.float32, shape=[batch_size, max_x_len, 2])
seq_lengths = tf.placeholder(tf.int32, shape=(batch_size))
y_truth_indices = tf.placeholder(tf.int64)
y_truth_values = tf.placeholder(tf.int32)
y_truth_shape = tf.placeholder(tf.int64)
y_truth = tf.SparseTensor(y_truth_indices, y_truth_values, y_truth_shape)
y_truth_splitted = tf.sparse_split(0, batch_size, y_truth)

#LSTM-RNN
lstm_cell_fw = rnn_cell.BasicLSTMCell(num_hidden_neuron, 
    forget_bias=1.0, state_is_tuple = True)
lstm_cell_bw = rnn_cell.BasicLSTMCell(num_hidden_neuron, 
    forget_bias=1.0, state_is_tuple = True)
x_transposed = tf.transpose(x, [1, 0, 2])
x_reshaped = tf.reshape(x_transposed, [-1, 2])
x_list = tf.split(0, max_x_len, x_reshaped)
outputs_lstm, states_fw, states_bw = rnn.bidirectional_rnn(
    lstm_cell_fw, lstm_cell_bw, x_list, dtype=tf.float32)
outputs_lstm_reshaped = [tf.reshape(t, [batch_size, 2 * num_hidden_neuron]) for t in outputs_lstm]
W = tf.Variable(tf.truncated_normal([2*num_hidden_neuron,num_classes], stddev=0.1))
b = tf.Variable(tf.constant(0.1, shape=[num_classes]))
logits = tf.pack([tf.matmul(t, W) + b for t in outputs_lstm_reshaped])

#training
loss = tf.reduce_mean(ctc.ctc_loss(logits, y_truth, seq_lengths, ctc_merge_repeated=False))
train_step = tf.train.AdamOptimizer(learning_rate=0.003).minimize(loss)

#predictions
y_predictions_unsplitted = ctc.ctc_beam_search_decoder(logits, seq_lengths, merge_repeated=False)[0][0]
error = tf.reduce_sum(tf.edit_distance(tf.to_int32(y_predictions_unsplitted), 
    y_truth, normalize=False)) / tf.to_float(tf.size(y_truth.values))
y_predictions = tf.sparse_split(0, batch_size, y_predictions_unsplitted)


In [None]:
with tf.Session() as sess:
    tf.initialize_all_variables().run()
    for epoch in range(4000):
        x_batch,seq_lengths_batch,y_batch = new_batch(batch_size)
        feed_dict = {x: x_batch, seq_lengths: seq_lengths_batch,
            y_truth_indices: y_batch[0], y_truth_values: y_batch[1], y_truth_shape: y_batch[2]}
        if epoch % 100 != 0:
            train_step.run(feed_dict = feed_dict)
        else: #evaluate on (not for training used) batch
            loss_batch, error_batch, y_predictions_batch, y_truth_batch = \
                sess.run([loss, error, y_predictions, y_truth_splitted],feed_dict = feed_dict)
            print('Epoch %i: %f (loss = %f)' %(epoch, error_batch, loss_batch))
            print('  ', y_truth_batch[13].values, "->")
            print('  ', y_predictions_batch[13].values)

Epoch 0: 1.044898 (loss = 29.864012)
   [0 3] ->
   [7 5]
Epoch 100: 0.954545 (loss = 11.147442)
   [3 2] ->
   []
Epoch 200: 0.951111 (loss = 11.151478)
   [4 4 3 3 6] ->
   []
Epoch 300: 0.863469 (loss = 12.339660)
   [6 6 5 8 7] ->
   [6]
Epoch 400: 0.862069 (loss = 10.999942)
   [7 5 1 3 4 8 1 0] ->
   [7 8]
Epoch 500: 0.793991 (loss = 9.169186)
   [2 2 4] ->
   [4]
Epoch 600: 0.194853 (loss = 7.047174)
   [0 6 4 7] ->
   [0 6 4 7]
Epoch 700: 0.000000 (loss = 3.053547)
   [4] ->
   [4]
Epoch 800: 0.000000 (loss = 1.541650)
   [8 2 5] ->
   [8 2 5]
Epoch 900: 0.000000 (loss = 0.885364)
   [8 2] ->
   [8 2]
Epoch 1000: 0.000000 (loss = 0.516311)
   [1 1 7 6] ->
   [1 1 7 6]
Epoch 1100: 0.000000 (loss = 0.356968)
   [1 2 7 8 4 1 5 6] ->
   [1 2 7 8 4 1 5 6]
Epoch 1200: 0.000000 (loss = 0.239811)
   [7 7 3 6] ->
   [7 7 3 6]
Epoch 1300: 0.000000 (loss = 0.191230)
   [2 6 1 1 8 6 8 6] ->
   [2 6 1 1 8 6 8 6]
Epoch 1400: 0.000000 (loss = 0.161497)
   [2 8 4] ->
   [2 8 4]
Epoch 1500: 0.0