In [1]:
'''
A Recurrent Neural Network (LSTM) implementation example using TensorFlow..
Next word prediction after n_input words learned from text file.
A story is automatically generated if the predicted word is fed back as input.
Author: Rowel Atienza
Project: https://github.com/roatienza/Deep-Learning-Experiments
'''

from __future__ import print_function

import numpy as np
import tensorflow as tf
from tensorflow.contrib import rnn
import random
import collections
import time

start_time = time.time()
def elapsed(sec):
    if sec<60:
        return str(sec) + " sec"
    elif sec<(60*60):
        return str(sec/60) + " min"
    else:
        return str(sec/(60*60)) + " hr"


# Target log path
logs_path = '/tmp/tensorflow/rnn_words'
writer = tf.summary.FileWriter(logs_path)

# Text file containing words for training
training_file = 'belling_the_cat.txt'

In [2]:
def read_data(fname):
    with open(fname) as f:
        content = f.readlines()
    content = [x.strip() for x in content]
    content = [word for i in range(len(content)) for word in content[i].split()]
    content = np.array(content)
    return content

In [3]:
training_data = read_data(training_file)
print("Loaded training data...")

Loaded training data...


In [4]:
def build_dataset(words):
    count = collections.Counter(words).most_common()
    dictionary = dict()
    for word, _ in count:
        dictionary[word] = len(dictionary)
    reverse_dictionary = dict(zip(dictionary.values(), dictionary.keys()))
    return dictionary, reverse_dictionary

In [5]:
dictionary, reverse_dictionary = build_dataset(training_data)
vocab_size = len(dictionary)


In [6]:
vocab_size

112

In [7]:
# Parameters
learning_rate = 0.001
training_iters = 50000
display_step = 1000
n_input = 3

# number of units in RNN cell
n_hidden = 512

In [8]:
# tf Graph input
x = tf.placeholder("float", [None, n_input, 1])
y = tf.placeholder("float", [None, vocab_size])

# RNN output node weights and biases
weights = {
    'out': tf.Variable(tf.random_normal([n_hidden, vocab_size]))
}
biases = {
    'out': tf.Variable(tf.random_normal([vocab_size]))
}

In [9]:
def RNN(x, weights, biases):

    # reshape to [1, n_input]
    x = tf.reshape(x, [-1, n_input])

    # Generate a n_input-element sequence of inputs
    # (eg. [had] [a] [general] -> [20] [6] [33])
    x = tf.split(x,n_input,1)

    # 2-layer LSTM, each layer has n_hidden units.
    # Average Accuracy= 95.20% at 50k iter
    rnn_cell = rnn.MultiRNNCell([rnn.BasicLSTMCell(n_hidden),rnn.BasicLSTMCell(n_hidden)])

    # 1-layer LSTM with n_hidden units but with lower accuracy.
    # Average Accuracy= 90.60% 50k iter
    # Uncomment line below to test but comment out the 2-layer rnn.MultiRNNCell above
    # rnn_cell = rnn.BasicLSTMCell(n_hidden)

    # generate prediction
    outputs, states = rnn.static_rnn(rnn_cell, x, dtype=tf.float32)

    # there are n_input outputs but
    # we only want the last output
    return tf.matmul(outputs[-1], weights['out']) + biases['out']

In [10]:
pred = RNN(x, weights, biases)

In [11]:
offset = random.randint(0,n_input+1)
offset

2

In [12]:
n_input

3

In [13]:
training_data

array(['long', 'ago', ',', 'the', 'mice', 'had', 'a', 'general',
       'council', 'to', 'consider', 'what', 'measures', 'they', 'could',
       'take', 'to', 'outwit', 'their', 'common', 'enemy', ',', 'the',
       'cat', '.', 'some', 'said', 'this', ',', 'and', 'some', 'said',
       'that', 'but', 'at', 'last', 'a', 'young', 'mouse', 'got', 'up',
       'and', 'said', 'he', 'had', 'a', 'proposal', 'to', 'make', ',',
       'which', 'he', 'thought', 'would', 'meet', 'the', 'case', '.',
       'you', 'will', 'all', 'agree', ',', 'said', 'he', ',', 'that',
       'our', 'chief', 'danger', 'consists', 'in', 'the', 'sly', 'and',
       'treacherous', 'manner', 'in', 'which', 'the', 'enemy',
       'approaches', 'us', '.', 'now', ',', 'if', 'we', 'could',
       'receive', 'some', 'signal', 'of', 'her', 'approach', ',', 'we',
       'could', 'easily', 'escape', 'from', 'her', '.', 'i', 'venture',
       ',', 'therefore', ',', 'to', 'propose', 'that', 'a', 'small',
       'bell', 'be', 'pr

In [14]:
dictionary

{',': 0,
 '.': 2,
 '?': 49,
 'a': 6,
 'about': 71,
 'ago': 73,
 'agree': 57,
 'all': 34,
 'always': 43,
 'an': 38,
 'and': 3,
 'another': 87,
 'applause': 69,
 'approach': 83,
 'approaches': 54,
 'at': 20,
 'attached': 95,
 'be': 109,
 'bell': 23,
 'but': 33,
 'by': 19,
 'case': 42,
 'cat': 12,
 'chief': 80,
 'common': 53,
 'consider': 48,
 'consists': 86,
 'could': 7,
 'council': 61,
 'danger': 100,
 'easily': 28,
 'easy': 46,
 'enemy': 29,
 'escape': 101,
 'from': 85,
 'general': 24,
 'got': 32,
 'had': 30,
 'he': 13,
 'her': 36,
 'i': 60,
 'if': 90,
 'impossible': 77,
 'in': 9,
 'is': 11,
 'it': 78,
 'know': 45,
 'last': 92,
 'long': 41,
 'looked': 47,
 'make': 75,
 'manner': 66,
 'means': 70,
 'measures': 104,
 'meet': 44,
 'met': 79,
 'mice': 18,
 'mouse': 16,
 'neck': 52,
 'neighbourhood': 72,
 'nobody': 62,
 'now': 76,
 'of': 35,
 'old': 17,
 'one': 98,
 'our': 67,
 'outwit': 74,
 'procured': 108,
 'proposal': 26,
 'propose': 25,
 'receive': 107,
 'remedies': 65,
 'retire': 96,


In [15]:
symbols_in_keys = [[dictionary[ str(training_data[i])]] for i in range(offset, offset+n_input) ]

In [16]:
symbols_in_keys

[[0], [1], [18]]

In [17]:
symbols_in_keys = np.reshape(np.array(symbols_in_keys), [-1, n_input, 1])

In [18]:
symbols_in_keys

array([[[ 0],
        [ 1],
        [18]]])

In [19]:
symbols_out_onehot = np.zeros([vocab_size], dtype=float)
symbols_out_onehot[dictionary[str(training_data[offset+n_input])]] = 1.0
symbols_out_onehot = np.reshape(symbols_out_onehot,[1,-1])

In [20]:
symbols_out_onehot

array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])

In [21]:
xd = symbols_in_keys

In [22]:
xd.shape

(1, 3, 1)

In [23]:
# reshape to [1, n_input]
xd = tf.reshape(xd, [-1, n_input])

In [24]:
xd

<tf.Tensor 'Reshape_1:0' shape=(1, 3) dtype=int32>

In [25]:
# Generate a n_input-element sequence of inputs
# (eg. [had] [a] [general] -> [20] [6] [33])
xd = tf.split(xd,n_input,1)

In [26]:
xd

[<tf.Tensor 'split_1:0' shape=(1, 1) dtype=int32>,
 <tf.Tensor 'split_1:1' shape=(1, 1) dtype=int32>,
 <tf.Tensor 'split_1:2' shape=(1, 1) dtype=int32>]

In [27]:
# Loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate).minimize(cost)

# Model evaluation
correct_pred = tf.equal(tf.argmax(pred,1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

# Initializing the variables
init = tf.global_variables_initializer()

Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See tf.nn.softmax_cross_entropy_with_logits_v2.



In [None]:
# Launch the graph
with tf.Session() as session:
    session.run(init)
    step = 0
    offset = random.randint(0,n_input+1)
    end_offset = n_input + 1
    acc_total = 0
    loss_total = 0

    writer.add_graph(session.graph)

    while step < training_iters:
        # Generate a minibatch. Add some randomness on selection process.
        if offset > (len(training_data)-end_offset):
            offset = random.randint(0, n_input+1)

        symbols_in_keys = [ [dictionary[ str(training_data[i])]] for i in range(offset, offset+n_input) ]
        symbols_in_keys = np.reshape(np.array(symbols_in_keys), [-1, n_input, 1])

        symbols_out_onehot = np.zeros([vocab_size], dtype=float)
        symbols_out_onehot[dictionary[str(training_data[offset+n_input])]] = 1.0
        symbols_out_onehot = np.reshape(symbols_out_onehot,[1,-1])

        _, acc, loss, onehot_pred = session.run([optimizer, accuracy, cost, pred], \
                                                feed_dict={x: symbols_in_keys, y: symbols_out_onehot})
        loss_total += loss
        acc_total += acc
        if (step+1) % display_step == 0:
            print("Iter= " + str(step+1) + ", Average Loss= " + \
                  "{:.6f}".format(loss_total/display_step) + ", Average Accuracy= " + \
                  "{:.2f}%".format(100*acc_total/display_step))
            acc_total = 0
            loss_total = 0
            symbols_in = [training_data[i] for i in range(offset, offset + n_input)]
            symbols_out = training_data[offset + n_input]
            symbols_out_pred = reverse_dictionary[int(tf.argmax(onehot_pred, 1).eval())]
            print("%s - [%s] vs [%s]" % (symbols_in,symbols_out,symbols_out_pred))
        step += 1
        offset += (n_input+1)
    print("Optimization Finished!")
    print("Elapsed time: ", elapsed(time.time() - start_time))
    print("Run on command line.")
    print("\ttensorboard --logdir=%s" % (logs_path))
    print("Point your web browser to: http://localhost:6006/")
    while True:
        prompt = "%s words: " % n_input
        sentence = input(prompt)
        sentence = sentence.strip()
        words = sentence.split(' ')
        if len(words) != n_input:
            continue
        try:
            symbols_in_keys = [dictionary[str(words[i])] for i in range(len(words))]
            for i in range(32):
                keys = np.reshape(np.array(symbols_in_keys), [-1, n_input, 1])
                onehot_pred = session.run(pred, feed_dict={x: keys})
                onehot_pred_index = int(tf.argmax(onehot_pred, 1).eval())
                sentence = "%s %s" % (sentence,reverse_dictionary[onehot_pred_index])
                symbols_in_keys = symbols_in_keys[1:]
                symbols_in_keys.append(onehot_pred_index)
            print(sentence)
        except:
            print("Word not in dictionary")


Iter= 1000, Average Loss= 4.262005, Average Accuracy= 6.20%
['to', 'bell', 'the'] - [cat] vs [cat]
Iter= 2000, Average Loss= 2.676909, Average Accuracy= 30.80%
['in', 'the', 'neighbourhood'] - [.] vs [.]
Iter= 3000, Average Loss= 2.578902, Average Accuracy= 34.70%
['always', 'know', 'when'] - [she] vs [her]
Iter= 4000, Average Loss= 1.830217, Average Accuracy= 51.50%
['we', 'should', 'always'] - [know] vs [know]
Iter= 5000, Average Loss= 1.726553, Average Accuracy= 56.30%
['procured', ',', 'and'] - [attached] vs [could]
Iter= 6000, Average Loss= 1.649877, Average Accuracy= 61.10%
['of', 'her', 'approach'] - [,] vs [,]
Iter= 7000, Average Loss= 1.270723, Average Accuracy= 67.50%
['that', 'our', 'chief'] - [danger] vs [common]
Iter= 8000, Average Loss= 1.162567, Average Accuracy= 70.70%
['proposal', 'to', 'make'] - [,] vs [,]
Iter= 9000, Average Loss= 1.150415, Average Accuracy= 70.10%
['that', 'but', 'at'] - [last] vs [last]
Iter= 10000, Average Loss= 1.026651, Average Accuracy= 75.20%


In [None]:
prompt = "%s words: " % n_input
sentence = input(prompt)
sentence = sentence.strip()
words = sentence.split(' ')

In [None]:
words