# Understanding LSTM Networks

## 1. Datenaufbereitung

In [1]:
import numpy as np
from pprint import pprint
import datetime

import trainer
reload(trainer)

sequence_length = 6

reference_input_data, reference_output_data = trainer.getSequences(sequence_length)

# trainer.getSequences(sequence_length) generates all possible combinations of
# the characters '+-0I', so for a sequence length of 6 characters there are a
# a total of 4^6 = 4096 possible combinations. Some Examples:
# '+-+-+-' = 0
# '------' = -6
# '0++000' = 2
# 'I++000' = -2
#
# Those sequences are encoded: Every character is representated by a vector, so the actual
# return value from trainer.getSequences looks like this:
pprint(reference_input_data[0])

# There is a helper to decode that again:
pprint(trainer.decodeSequence(reference_input_data[0]))

# The solution for that sequence is:
pprint(reference_output_data[0])

instruction_count = np.array(reference_input_data).shape[2]

array([[1, 0, 0],
       [0, 0, 1],
       [1, 0, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 0, 0]])
'+I+--0'
2


In [2]:
NUM_EXAMPLES = len(reference_input_data) / 4 # we use 1/4 of the data for the training

test_input = reference_input_data[NUM_EXAMPLES:]
test_output = reference_output_data[NUM_EXAMPLES:] # everything beyond NUM_EXAMPLES

train_input = reference_input_data[:NUM_EXAMPLES]
train_output = reference_output_data[:NUM_EXAMPLES]

print("We'll train using " + str(NUM_EXAMPLES) + "/" + str(len(reference_input_data)) + " Examples")

We'll train using 1024/4096 Examples


In [3]:
import tensorflow as tf

data = tf.placeholder(tf.float32, [None, sequence_length, instruction_count], name='data')
target = tf.transpose(tf.placeholder(tf.float32, [None], name='target'))

## 2. Die LSTM Schicht

In [4]:
LSTM_SETTINGS = {
    'num_cells': 24,
    'feature_size': 3
}

cell = tf.nn.rnn_cell.LSTMCell(LSTM_SETTINGS['num_cells'], state_is_tuple=True)

lstm_predictions, state = tf.nn.dynamic_rnn(cell, data, dtype=tf.float32)
lstm_predictions = tf.transpose(lstm_predictions, [1, 0, 2])

lstm_prediction = tf.gather(lstm_predictions, int(lstm_predictions.get_shape()[0]) - 1)

In [5]:
weight = tf.Variable(tf.truncated_normal([LSTM_SETTINGS['num_cells'], 1]))
bias = tf.Variable(tf.constant(0.1, shape=[1]))

prediction = tf.matmul(lstm_prediction, weight) + bias

## 3. Cost & Optimizing

In [6]:
with tf.name_scope('mean_square_error'):
    mean_square_error = tf.reduce_sum(tf.square(tf.subtract(target, tf.unstack(prediction, axis = 1))))
tf.summary.scalar('mean_square_error', mean_square_error)

<tf.Tensor 'mean_square_error_1:0' shape=() dtype=string>

In [7]:
optimizer = tf.train.AdamOptimizer()
minimize = optimizer.minimize(mean_square_error)

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


In [8]:
with tf.name_scope('error'):
    with tf.name_scope('mistakes'):
        mistakes = tf.not_equal(target, tf.round(tf.unstack(prediction, axis = 1)))
    with tf.name_scope('error'):
        error = tf.reduce_mean(tf.cast(mistakes, tf.float32))
tf.summary.scalar('error', error)

<tf.Tensor 'error_1:0' shape=() dtype=string>

## 4. Training

In [None]:
sess = tf.InteractiveSession()
merged = tf.summary.merge_all()

date = str(datetime.datetime.now())
train_writer = tf.summary.FileWriter('logs/selfmade_lstm/' + date + '/train', sess.graph)
test_writer = tf.summary.FileWriter('logs/selfmade_lstm/' + date + 'test')

model_checkpoint = 'lstm_self_built.chkpt'

tf_saver = tf.train.Saver(tf.global_variables())

init_op = tf.global_variables_initializer()
sess.run(init_op)

In [None]:
epoch = 4000

for i in range(epoch):
    if (i + 1) % 20 == 0:
        summary, incorrect, mean_squ_err = sess.run([merged, error, mean_square_error], {data: test_input, target: test_output})
        test_writer.add_summary(summary, i)
        
        print('Epoch {:4d} | incorrect {: 3.1f}% | mean squ error {: 3.1f}'.format(i + 1, incorrect * 100, mean_squ_err))
    else:
        summary, acc = sess.run([merged, error], {data: train_input, target: train_output})
        train_writer.add_summary(summary, i)
    
    sess.run(minimize,{data: train_input, target: train_output})
    
    if i % 100:
        tf_saver.save(sess, model_checkpoint)

Epoch   20 | incorrect  77.8% | mean squ error  9213.7
Epoch   40 | incorrect  77.8% | mean squ error  8520.0
Epoch   60 | incorrect  75.6% | mean squ error  7836.4
Epoch   80 | incorrect  72.6% | mean squ error  7428.3
Epoch  100 | incorrect  72.4% | mean squ error  7240.1
Epoch  120 | incorrect  72.4% | mean squ error  7032.9
Epoch  140 | incorrect  71.9% | mean squ error  6738.3
Epoch  160 | incorrect  71.7% | mean squ error  6314.5
Epoch  180 | incorrect  71.4% | mean squ error  5744.2
Epoch  200 | incorrect  70.8% | mean squ error  5109.4
Epoch  220 | incorrect  67.3% | mean squ error  4520.6
Epoch  240 | incorrect  62.1% | mean squ error  3868.9
Epoch  260 | incorrect  58.3% | mean squ error  3255.9
Epoch  280 | incorrect  56.1% | mean squ error  2874.8
Epoch  300 | incorrect  54.8% | mean squ error  2674.3
Epoch  320 | incorrect  52.8% | mean squ error  2513.3
Epoch  340 | incorrect  50.5% | mean squ error  2356.5
Epoch  360 | incorrect  46.9% | mean squ error  2190.4
Epoch  380

In [None]:
# reload(trainer)
sess.run(prediction, {data: [trainer.encodeSequence("II++++")]})

In [None]:
# sess.close()
# train_writer.close()
# test_writer.close()
