# Out of the Box LSTM with TensorFlow

## 1. Data Preparation

In [1]:
import numpy as np
from pprint import pprint
import datetime

import data_generator

sequence_length = 6

reference_input_data, reference_output_data = data_generator.getSequences(sequence_length)

# data_generator.getSequences(sequence_length) generates all possible combinations of
# the characters '+-0I', so for a sequence length of 6 characters there are a
# a total of 4^6 = 4096 possible combinations. Some Examples:
# '+-+-+-' = 0
# '------' = -6
# '0++000' = 2
# 'I++000' = -2
#
# Those sequences are encoded: Every character is representated by a vector, so the actual
# return value from data_generator.getSequences looks like this:
pprint(reference_input_data[0])

# There is a helper to decode that again:
pprint(data_generator.decodeSequence(reference_input_data[0]))

# The solution for that sequence is:
pprint(reference_output_data[0])

instruction_count = np.array(reference_input_data).shape[2]

array([[1, 0, 0],
       [0, 1, 0],
       [1, 0, 0],
       [1, 0, 0],
       [0, 0, 0],
       [1, 0, 0]])
'+-++0+'
3


In [2]:
NUM_EXAMPLES = len(reference_input_data) / 4 # we use 1/4 of the data for the training

test_input = reference_input_data[NUM_EXAMPLES:]
test_output = reference_output_data[NUM_EXAMPLES:] # everything beyond NUM_EXAMPLES

train_input = reference_input_data[:NUM_EXAMPLES]
train_output = reference_output_data[:NUM_EXAMPLES]

print("We'll train using " + str(NUM_EXAMPLES) + "/" + str(len(reference_input_data)) + " Examples")

We'll train using 1024/4096 Examples


In [3]:
import tensorflow as tf

data = tf.placeholder(tf.float32, [None, sequence_length, instruction_count], name='data')
target = tf.transpose(tf.placeholder(tf.float32, [None], name='target'))

## 2. LSTM Layer

In [4]:
LSTM_SIZE = 24

In [5]:
lstm_cell = tf.nn.rnn_cell.LSTMCell(LSTM_SIZE)

In [6]:
lstm_predictions, state = tf.nn.dynamic_rnn(lstm_cell, data, dtype=tf.float32)

lstm_predictions = tf.transpose(lstm_predictions, [1, 0, 2])
lstm_prediction = tf.gather(lstm_predictions, int(lstm_predictions.get_shape()[0]) - 1)

In [7]:
weight = tf.Variable(tf.truncated_normal([LSTM_SIZE, 1]))
bias = tf.Variable(tf.constant(0.1, shape=[1]))

prediction = tf.matmul(lstm_prediction, weight) + bias

## 3. Cost & Optimizing

In [8]:
with tf.name_scope('mean_square_error'):
    mean_square_error = tf.reduce_sum(tf.square(tf.subtract(target, tf.unstack(prediction, axis = 1))))
tf.summary.scalar('mean_square_error', mean_square_error)

<tf.Tensor 'mean_square_error_1:0' shape=() dtype=string>

In [9]:
optimizer = tf.train.AdamOptimizer()
minimize = optimizer.minimize(mean_square_error)

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


In [10]:
with tf.name_scope('error'):
    with tf.name_scope('mistakes'):
        mistakes = tf.not_equal(target, tf.round(tf.unstack(prediction, axis = 1)))
    with tf.name_scope('error'):
        error = tf.reduce_mean(tf.cast(mistakes, tf.float32))
tf.summary.scalar('error', error)

<tf.Tensor 'error_1:0' shape=() dtype=string>

## 4. Training

In [11]:
sess = tf.InteractiveSession()
merged = tf.summary.merge_all()

date = str(datetime.datetime.now())
train_writer = tf.summary.FileWriter('logs/out_of_the_box_lstm/' + date + '/train', sess.graph)
test_writer = tf.summary.FileWriter('logs/sout_of_the_box_lstm/' + date + '/test', sess.graph)

init_op = tf.global_variables_initializer()
sess.run(init_op)

In [12]:
epoch = 4000

for i in range(epoch):
    if (i + 1) % 20 == 0:
        summary, incorrect, mean_squ_err = sess.run([merged, error, mean_square_error], {data: test_input, target: test_output})
        test_writer.add_summary(summary, i)
        
        print('Epoch {:4d} | incorrect {: 3.1f}% | mean squ error {: 3.1f}'.format(i + 1, incorrect * 100, mean_squ_err))
    else:
        summary, acc = sess.run([merged, error], {data: train_input, target: train_output})
        train_writer.add_summary(summary, i)
    
    sess.run(minimize,{data: train_input, target: train_output})

Epoch   20 | incorrect  77.5% | mean squ error  8737.9
Epoch   40 | incorrect  77.0% | mean squ error  8102.8
Epoch   60 | incorrect  73.7% | mean squ error  7553.9
Epoch   80 | incorrect  73.6% | mean squ error  7313.4
Epoch  100 | incorrect  74.2% | mean squ error  7095.0
Epoch  120 | incorrect  73.2% | mean squ error  6811.3
Epoch  140 | incorrect  73.0% | mean squ error  6269.7
Epoch  160 | incorrect  71.0% | mean squ error  5386.7
Epoch  180 | incorrect  66.4% | mean squ error  4642.8
Epoch  200 | incorrect  62.3% | mean squ error  3968.5
Epoch  220 | incorrect  59.7% | mean squ error  3501.6
Epoch  240 | incorrect  57.3% | mean squ error  3178.4
Epoch  260 | incorrect  54.8% | mean squ error  2947.7
Epoch  280 | incorrect  53.4% | mean squ error  2775.0
Epoch  300 | incorrect  51.7% | mean squ error  2635.8
Epoch  320 | incorrect  50.4% | mean squ error  2504.8
Epoch  340 | incorrect  49.7% | mean squ error  2376.9
Epoch  360 | incorrect  48.2% | mean squ error  2240.8
Epoch  380

Epoch 3100 | incorrect  1.3% | mean squ error  117.1
Epoch 3120 | incorrect  1.3% | mean squ error  116.4
Epoch 3140 | incorrect  1.3% | mean squ error  115.7
Epoch 3160 | incorrect  1.3% | mean squ error  115.0
Epoch 3180 | incorrect  1.3% | mean squ error  114.4
Epoch 3200 | incorrect  1.3% | mean squ error  114.6
Epoch 3220 | incorrect  1.3% | mean squ error  114.8
Epoch 3240 | incorrect  1.3% | mean squ error  112.3
Epoch 3260 | incorrect  1.3% | mean squ error  111.8
Epoch 3280 | incorrect  1.3% | mean squ error  111.2
Epoch 3300 | incorrect  1.3% | mean squ error  110.6
Epoch 3320 | incorrect  1.3% | mean squ error  110.0
Epoch 3340 | incorrect  1.3% | mean squ error  109.4
Epoch 3360 | incorrect  1.3% | mean squ error  108.8
Epoch 3380 | incorrect  1.3% | mean squ error  108.2
Epoch 3400 | incorrect  1.3% | mean squ error  107.6
Epoch 3420 | incorrect  1.3% | mean squ error  107.0
Epoch 3440 | incorrect  1.3% | mean squ error  106.4
Epoch 3460 | incorrect  1.3% | mean squ error 

In [13]:
# Test the result
sess.run(prediction, {data: [data_generator.encodeSequence("00-+++")]})

array([[ 1.99913526]], dtype=float32)

In [14]:
sess.close()
train_writer.close()
test_writer.close()
