# Code by Rowel Atienza
This is a tutorial we found in the internet, in which a network involving LSTM cells is trained to predict text

First we declare this IS NOT our code

Import some things

In [21]:
'''
A Recurrent Neural Network (LSTM) implementation example using TensorFlow..
Next word prediction after n_input words learned from text file.
A story is automatically generated if the predicted word is fed back as input.
Author: Rowel Atienza
Project: https://github.com/roatienza/Deep-Learning-Experiments
'''

from __future__ import print_function

import numpy as np
import tensorflow as tf
from tensorflow.contrib import rnn
import random
import collections
import time

import os # To set environment variables

A function to treat time

In [2]:
start_time = time.time()
def elapsed(sec):
    if sec<60:
        return str(sec) + " sec"
    elif sec<(60*60):
        return str(sec/60) + " min"
    else:
        return str(sec/(60*60)) + " hr"


We will be using TensorBoard, so we need a path for the log

In [3]:
# Target log path
logs_path = '/tmp/tensorflow/rnn_words'
writer = tf.summary.FileWriter(logs_path)

We will have a .txt file from which the network will learn the connections between the words

In [4]:
# Text file containing words for training
training_file = 'belling_the_cat.txt'

Function to read data from a file

In [5]:
def read_data(fname):
    with open(fname) as f:
        content = f.readlines()
    content = [x.strip() for x in content]
    content = [content[i].split() for i in range(len(content))]
    content = np.array(content)
    content = np.reshape(content, [-1, ])
    return content


Now we read the data

In [6]:
training_data = read_data(training_file)
print("Loaded training data...")

Loaded training data...


For these tasks we need a way to 'code' the words. In this case we'll do something that is extremely not optimal and that doesn't make much sense, but it's just for simplification: we won't use some sort of word embedding, but just give a unique key to each word in the file.

In [7]:
def build_dataset(words):
    count = collections.Counter(words).most_common()
    dictionary = dict()
    for word, _ in count:
        dictionary[word] = len(dictionary)
    reverse_dictionary = dict(zip(dictionary.values(), dictionary.keys()))
    return dictionary, reverse_dictionary


We simply assigned a number to each word based on whatever we could do to make it unique.
Now we actually compute these dicitonaries:

In [8]:
dictionary, reverse_dictionary = build_dataset(training_data)
vocab_size = len(dictionary)

Define some parameters

In [9]:
# Parameters
learning_rate = 0.001
training_iters = 50000
display_step = 1000
n_input = 3

# number of units in RNN cell
n_hidden = 512

Now we'll actually start to create the graph, so we need placeholders for the inputs:

In [10]:
# tf Graph input
x = tf.placeholder("float", [None, n_input, 1])
y = tf.placeholder("float", [None, vocab_size])

Some initialization for weights and biases:

In [11]:
# RNN output node weights and biases
weights = {
    'out': tf.Variable(tf.random_normal([n_hidden, vocab_size]))
}
biases = {
    'out': tf.Variable(tf.random_normal([vocab_size]))
}

The actual definition of the network is quite simple. Here, it's defined as a function, which is actually called only once.

In [12]:
def RNN(x, weights, biases):

    # reshape to [1, n_input]
    x = tf.reshape(x, [-1, n_input])

    # Generate a n_input-element sequence of inputs
    # (eg. [had] [a] [general] -> [20] [6] [33])
    x = tf.split(x,n_input,1)

    # 2-layer LSTM, each layer has n_hidden units.
    # Average Accuracy= 95.20% at 50k iter
    rnn_cell = rnn.MultiRNNCell([rnn.BasicLSTMCell(n_hidden),rnn.BasicLSTMCell(n_hidden)])

    # 1-layer LSTM with n_hidden units but with lower accuracy.
    # Average Accuracy= 90.60% 50k iter
    # Uncomment line below to test but comment out the 2-layer rnn.MultiRNNCell above
    # rnn_cell = rnn.BasicLSTMCell(n_hidden)

    # generate prediction
    outputs, states = rnn.static_rnn(rnn_cell, x, dtype=tf.float32)
    # there are n_input outputs but
    # we only want the last output
    return tf.matmul(outputs[-1], weights['out']) + biases['out']


We need, now the output of the RNN, which we'll give to the optimizer:

In [13]:
pred = RNN(x, weights, biases)

Definition of the cost and the optimizer of such cost:

In [15]:
# Loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate).minimize(cost)


Here we compute the cross entropy between the distribution given by 'pred' and the one given by the labels 'y'. When we apply 'soft_max_cross_entropy' we get something of shape=(?,), which means we have a given number per each batch, and then we take the mean of that, so that we get one number per batch.

We want to minize such cross entropy, and we use an optimizer that has proven to be good in this task: RMS Prop.

Regarding the evaluation we will take the word that was predicted by the network and the one that actually was the truth, therefore we take the 'argmax' and the element-wise equal. Then, we take the mean of that, which we would like to be higher (only 1's!)

In [20]:
# Model evaluation
correct_pred = tf.equal(tf.argmax(pred,1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))


In [22]:
os.environ["CUDA_VISIBLE_DEVICES"]="0"

Now the actual running of the graph:

In [23]:
# Initializing the variables
init = tf.global_variables_initializer()

# Launch the graph
with tf.Session() as session:
    session.run(init)
    step = 0
    offset = random.randint(0,n_input+1)
    end_offset = n_input + 1
    acc_total = 0
    loss_total = 0

    writer.add_graph(session.graph)

    while step < training_iters:
        # Generate a minibatch. Add some randomness on selection process.
        if offset > (len(training_data)-end_offset):
            offset = random.randint(0, n_input+1)

        symbols_in_keys = [ [dictionary[ str(training_data[i])]] for i in range(offset, offset+n_input) ]
        symbols_in_keys = np.reshape(np.array(symbols_in_keys), [-1, n_input, 1])

        symbols_out_onehot = np.zeros([vocab_size], dtype=float)
        symbols_out_onehot[dictionary[str(training_data[offset+n_input])]] = 1.0
        symbols_out_onehot = np.reshape(symbols_out_onehot,[1,-1])

        _, acc, loss, onehot_pred = session.run([optimizer, accuracy, cost, pred], \
                                                feed_dict={x: symbols_in_keys, y: symbols_out_onehot})
        loss_total += loss
        acc_total += acc
        if (step+1) % display_step == 0:
            print("Iter= " + str(step+1) + ", Average Loss= " + \
                  "{:.6f}".format(loss_total/display_step) + ", Average Accuracy= " + \
                  "{:.2f}%".format(100*acc_total/display_step))
            acc_total = 0
            loss_total = 0
            symbols_in = [training_data[i] for i in range(offset, offset + n_input)]
            symbols_out = training_data[offset + n_input]
            symbols_out_pred = reverse_dictionary[int(tf.argmax(onehot_pred, 1).eval())]
            print("%s - [%s] vs [%s]" % (symbols_in,symbols_out,symbols_out_pred))
        step += 1
        offset += (n_input+1)
    print("Optimization Finished!")
    print("Elapsed time: ", elapsed(time.time() - start_time))
    print("Run on command line.")
    print("\ttensorboard --logdir=%s" % (logs_path))
    print("Point your web browser to: http://localhost:6006/")
    while True:
        prompt = "%s words: " % n_input
        sentence = input(prompt)
        sentence = sentence.strip()
        words = sentence.split(' ')
        if len(words) != n_input:
            continue
        try:
            symbols_in_keys = [dictionary[str(words[i])] for i in range(len(words))]
            for i in range(32):
                keys = np.reshape(np.array(symbols_in_keys), [-1, n_input, 1])
                onehot_pred = session.run(pred, feed_dict={x: keys})
                onehot_pred_index = int(tf.argmax(onehot_pred, 1).eval())
                sentence = "%s %s" % (sentence,reverse_dictionary[onehot_pred_index])
                symbols_in_keys = symbols_in_keys[1:]
                symbols_in_keys.append(onehot_pred_index)
            print(sentence)
        except:
            print("Word not in dictionary")


Iter= 1000, Average Loss= 4.248973, Average Accuracy= 6.50%
[',', 'but', 'who'] - [is] vs [is]
Iter= 2000, Average Loss= 3.045276, Average Accuracy= 17.90%
['until', 'an', 'old'] - [mouse] vs [was]
Iter= 3000, Average Loss= 2.381154, Average Accuracy= 34.10%
['in', 'the', 'neighbourhood'] - [.] vs [.]
Iter= 4000, Average Loss= 2.148997, Average Accuracy= 44.00%
['.', 'by', 'this'] - [means] vs [said]
Iter= 5000, Average Loss= 1.790797, Average Accuracy= 53.90%
['be', 'procured', ','] - [and] vs [then]
Iter= 6000, Average Loss= 1.524009, Average Accuracy= 58.10%
['i', 'venture', ','] - [therefore] vs [case]
Iter= 7000, Average Loss= 1.528773, Average Accuracy= 61.90%
['of', 'her', 'approach'] - [,] vs [,]
Iter= 8000, Average Loss= 1.111616, Average Accuracy= 68.10%
['chief', 'danger', 'consists'] - [in] vs [the]
Iter= 9000, Average Loss= 1.066588, Average Accuracy= 70.50%
[',', 'which', 'he'] - [thought] vs [thought]
Iter= 10000, Average Loss= 0.931208, Average Accuracy= 75.70%
[',', 't

KeyboardInterrupt: 

In [24]:
symbols_in_keys

[111, 2, 107]