# Import

In [1]:
'''
A Recurrent Neural Network (LSTM) implementation example using TensorFlow

Next word prediction after n_input words learned from text file

A story is automatically generated if the predicted word is fed back as input

Based on Rowel Atienza's code
'''

from __future__ import print_function

import tensorflow as tf
from tensorflow.contrib import rnn

import numpy as np
import random
import collections

import time

  from ._conv import register_converters as _register_converters


# Record Elapsed time

In [2]:
def time_elapsed(t):

    if t<60:
        return str(t) + " second(s)"
    elif t<(60*60):
        return str(t/60) + " minute(s)"
    else:
        return str(t/(60*60)) + " hour(s)"

# Build Dataset

In [3]:
# read word data from files
def read_data(fname):
    
    with open(fname) as f:
        content = f.readlines()
    # strip useless indent    
    content = [x.strip() for x in content]
    content = [word for i in range(len(content)) for word in content[i].split()]
    content = np.array(content)
    return content


#module for building dictionary and reverse dictionary.
def build_dataset(words):
    #Return a list of the n most common elements and their counts from the most common to the least. 
    #If n is omitted or None, most_common() returns all elements in the counter.
    count = collections.Counter(words).most_common()
    dictionary = dict()
    for word, _ in count:
        #key of the dictionary is the word and take the index of frequencies as the corresponding value
        dictionary[word] = len(dictionary)
    reverse_dictionary = dict(zip(dictionary.values(), dictionary.keys()))
    return dictionary, reverse_dictionary

# Function for Building RNN Layers

In [4]:
def module_RNN(x, weights, biases):
    #weights and biases are dictionaries with a key as 'out' and content as a matrix or an array
    # reshape to [1, n_input]
    x = tf.reshape(x, [-1, n_input])

    # Generate a n_input-element sequence of inputs
    # (e.g. [A] [slave] [named] -> [9] [10] [37])
    x = tf.split(x,n_input,1)
    
    # basic RNN cell
    #rnn_cell = rnn.BasicRNNCell(n_hidden)

    # 1-layer LSTM with n_hidden units
    rnn_cell = rnn.BasicLSTMCell(n_hidden)
    
    # 2-layer LSTM, each layer has n_hidden units. And you can wrap more layers together by doing list comprehension.
    #rnn_cell = rnn.MultiRNNCell([rnn.BasicLSTMCell(n_hidden),rnn.BasicLSTMCell(n_hidden)])

    # 3-layer LSTM, each layer has n_hidden units. 
    # And you can wrap more layers together by doing list comprehension.
    #rnn_cell = rnn.MultiRNNCell([rnn.BasicLSTMCell(n_hidden),rnn.BasicLSTMCell(n_hidden),rnn.BasicLSTMCell(n_hidden)])

    # generate prediction
    # creates a recurrent neural network specified by RNNCell cell
    outputs, states = rnn.static_rnn(rnn_cell, x, dtype=tf.float32)

    # there are n_input outputs but
    # we only want the last output
    return tf.matmul(outputs[-1], weights['out']) + biases['out']

# Parameters and Load Data

In [5]:
# Parameters
learning_rate = 0.001

training_iters = 50000

display_step = 100
# number of words to be put into the network
n_input = 3

# number of units in RNN cell
n_hidden = 5


# Text file containing words for training
training_file = 'simpleStory.txt'
#training_file = 'androcles.txt'

training_data = read_data(training_file)
print("Loaded training data...")

dictionary, reverse_dictionary = build_dataset(training_data)
vocab_size = len(dictionary)

Loaded training data...


# Main Function

In [None]:
# TensorFlow Graph input
x = tf.placeholder("float", [None, n_input, 1])
y = tf.placeholder("float", [None, vocab_size])


# Logs_path
logs_path = 'C:/Users/Ali/logs/RNNs'
writer  = tf.summary.FileWriter(logs_path)

# RNN output node weights and biases
weights = {'out': tf.Variable(tf.random_normal([n_hidden, vocab_size]))}
biases  = {'out': tf.Variable(tf.random_normal([vocab_size]))}

# get the prediction in the probability form from the output of wraped RNN layers
pred = module_RNN(x, weights, biases)

# Loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate).minimize(cost)

# Model evaluation
correct_pred = tf.equal(tf.argmax(pred,1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

# Initializing the variables
init = tf.global_variables_initializer()

start_time = time.time()

# Launch the graph
with tf.Session() as session:
    session.run(init)
    step = 0
    offset = random.randint(0, n_input+1)
    end_offset = n_input + 1
    acc_total = 0
    loss_total = 0

    writer.add_graph(session.graph)

    while step < training_iters:

        # Generate a mini-batch
        # Add some randomness on selection process

        if offset > (len(training_data)-end_offset):
            offset = random.randint(0, n_input+1)
            
        #select size of n_input data from the training set and reshape them.
        symbols_in_keys = [ [dictionary[ str(training_data[i])]] for i in range(offset, offset+n_input) ]
        symbols_in_keys = np.reshape(np.array(symbols_in_keys), [-1, n_input, 1])
        
        #set up the one-hot-encoding of the labels and reshape them
        symbols_out_onehot = np.zeros([vocab_size], dtype=float)
        symbols_out_onehot[dictionary[str(training_data[offset+n_input])]] = 1.0
        symbols_out_onehot = np.reshape(symbols_out_onehot,[1,-1])
        
        # run optimizer,accuracy,cost and pred tensors and get the results
        _, acc, loss, onehot_pred = session.run([optimizer, accuracy, cost, pred], feed_dict={x: symbols_in_keys, y: symbols_out_onehot})

        loss_total += loss
        acc_total += acc
        
        #display the trend of loss and accuracy
        if (step+1) % display_step == 0:
            print("Iter = " + str(step+1) + ", average loss= " + \
                  "{:0.6f}".format(loss_total/display_step) + ", average accuracy= " + \
                  "{:0.2f}%".format(100*acc_total/display_step))
            #reset
            acc_total = 0
            loss_total = 0
            
            #have a brief view of the relationship of input, real value and prediction result
            symbols_in = [training_data[i] for i in range(offset, offset + n_input)]
            symbols_out = training_data[offset + n_input]
            symbols_out_pred = reverse_dictionary[int(tf.argmax(onehot_pred, 1).eval())]
            
            print("%s - [%s] vs [%s]" % (symbols_in, symbols_out, symbols_out_pred))
        step += 1
        offset += (n_input+1)
    print("Optimization Finished!")
    
    t_elapsed = time.time() - start_time
    print("Elapsed time: ", time_elapsed(t_elapsed))
    
    #Take a simple experiment using customized input
    while True:
        
        prompt = "insert %s words: " % n_input
        sentence = input(prompt)
        sentence = sentence.strip()
        words = sentence.split(' ')
        
        if len(words) != n_input:
            continue
        try:
            symbols_in_keys = [dictionary[str(words[i])] for i in range(len(words))]
            
            # how many many words
            for i in range(10):
                keys = np.reshape(np.array(symbols_in_keys), [-1, n_input, 1])
                onehot_pred = session.run(pred, feed_dict={x: keys})
                onehot_pred_index = int(tf.argmax(onehot_pred, 1).eval())
                sentence = "%s %s" % (sentence, reverse_dictionary[onehot_pred_index])
                symbols_in_keys = symbols_in_keys[1:]
                symbols_in_keys.append(onehot_pred_index)
                
            print(sentence)
        except:
            print("cannot be found in dictionary")

Iter = 100, average loss= 2.249367, average accuracy= 30.00%
['dinner', 'with', 'Steve'] - [.] vs [with]
Iter = 200, average loss= 2.266527, average accuracy= 12.00%
['Steve', 'had', 'breakfast'] - [with] vs [.]
Iter = 300, average loss= 2.249628, average accuracy= 0.00%
['with', 'Sarah', '.'] - [Sarah] vs [with]
Iter = 400, average loss= 2.157445, average accuracy= 2.00%
['Steve', 'had', 'breakfast'] - [with] vs [.]
Iter = 500, average loss= 2.127943, average accuracy= 5.00%
['had', 'lunch', 'with'] - [Sarah] vs [with]
Iter = 600, average loss= 1.985189, average accuracy= 11.00%
['Steve', '.', 'Steve'] - [had] vs [.]
Iter = 700, average loss= 2.008005, average accuracy= 17.00%
['.', 'Steve', 'had'] - [breakfast] vs [with]
Iter = 800, average loss= 1.932054, average accuracy= 13.00%
['dinner', 'with', 'Steve'] - [.] vs [with]
Iter = 900, average loss= 1.823905, average accuracy= 21.00%
['dinner', 'with', 'Steve'] - [.] vs [.]
Iter = 1000, average loss= 2.014652, average accuracy= 18.00

Iter = 7900, average loss= 0.547247, average accuracy= 80.00%
['breakfast', 'with', 'David'] - [.] vs [.]
Iter = 8000, average loss= 0.326098, average accuracy= 88.00%
['Steve', 'had', 'breakfast'] - [with] vs [with]
Iter = 8100, average loss= 0.454074, average accuracy= 83.00%
['had', 'dinner', 'with'] - [Steve] vs [Sarah]
Iter = 8200, average loss= 0.575028, average accuracy= 76.00%
['had', 'breakfast', 'with'] - [David] vs [Steve]
Iter = 8300, average loss= 0.418218, average accuracy= 84.00%
['with', 'Sarah', '.'] - [Sarah] vs [Sarah]
Iter = 8400, average loss= 0.380489, average accuracy= 85.00%
['dinner', 'with', 'Steve'] - [.] vs [.]
Iter = 8500, average loss= 0.323487, average accuracy= 91.00%
['had', 'dinner', 'with'] - [Steve] vs [Steve]
Iter = 8600, average loss= 0.413336, average accuracy= 86.00%
['with', 'Sarah', '.'] - [Sarah] vs [Sarah]
Iter = 8700, average loss= 0.311604, average accuracy= 88.00%
['Steve', 'had', 'breakfast'] - [with] vs [with]
Iter = 8800, average loss= 

Iter = 15600, average loss= 0.186850, average accuracy= 95.00%
['.', 'Steve', 'had'] - [breakfast] vs [breakfast]
Iter = 15700, average loss= 0.141159, average accuracy= 95.00%
['Steve', 'had', 'breakfast'] - [with] vs [with]
Iter = 15800, average loss= 0.237180, average accuracy= 93.00%
['had', 'dinner', 'with'] - [Steve] vs [Steve]
Iter = 15900, average loss= 0.172379, average accuracy= 98.00%
['had', 'lunch', 'with'] - [Sarah] vs [Sarah]
Iter = 16000, average loss= 0.218673, average accuracy= 92.00%
['Steve', 'had', 'breakfast'] - [with] vs [with]
Iter = 16100, average loss= 0.222834, average accuracy= 94.00%
['had', 'dinner', 'with'] - [Steve] vs [Steve]
Iter = 16200, average loss= 0.212277, average accuracy= 94.00%
['breakfast', 'with', 'David'] - [.] vs [.]
Iter = 16300, average loss= 0.165584, average accuracy= 96.00%
['dinner', 'with', 'Steve'] - [.] vs [.]
Iter = 16400, average loss= 0.262817, average accuracy= 92.00%
['Sarah', '.', 'Sarah'] - [had] vs [had]
Iter = 16500, aver

Iter = 23400, average loss= 0.106008, average accuracy= 95.00%
['Sarah', '.', 'Sarah'] - [had] vs [had]
Iter = 23500, average loss= 0.119479, average accuracy= 94.00%
['had', 'lunch', 'with'] - [Sarah] vs [Sarah]
Iter = 23600, average loss= 0.111454, average accuracy= 94.00%
['Sarah', 'had', 'dinner'] - [with] vs [with]
Iter = 23700, average loss= 0.128273, average accuracy= 99.00%
['lunch', 'with', 'Sarah'] - [.] vs [.]
Iter = 23800, average loss= 0.123826, average accuracy= 97.00%
['Sarah', 'had', 'dinner'] - [with] vs [with]
Iter = 23900, average loss= 0.085897, average accuracy= 96.00%
['.', 'Sarah', 'had'] - [dinner] vs [dinner]
Iter = 24000, average loss= 0.073454, average accuracy= 100.00%
['Steve', 'had', 'breakfast'] - [with] vs [with]
Iter = 24100, average loss= 0.084158, average accuracy= 100.00%
['dinner', 'with', 'Steve'] - [.] vs [.]
Iter = 24200, average loss= 0.088799, average accuracy= 96.00%
['Sarah', '.', 'Sarah'] - [had] vs [had]
Iter = 24300, average loss= 0.101856

Iter = 31100, average loss= 0.021741, average accuracy= 100.00%
['Sarah', 'had', 'dinner'] - [with] vs [with]
Iter = 31200, average loss= 0.016274, average accuracy= 100.00%
['Steve', 'had', 'breakfast'] - [with] vs [with]
Iter = 31300, average loss= 0.020716, average accuracy= 100.00%
['David', 'had', 'lunch'] - [with] vs [with]
Iter = 31400, average loss= 0.017756, average accuracy= 100.00%
['with', 'Steve', '.'] - [Steve] vs [Steve]
Iter = 31500, average loss= 0.014375, average accuracy= 100.00%
['breakfast', 'with', 'David'] - [.] vs [.]
Iter = 31600, average loss= 0.012303, average accuracy= 100.00%
['breakfast', 'with', 'David'] - [.] vs [.]
Iter = 31700, average loss= 0.009865, average accuracy= 100.00%
['Steve', '.', 'Steve'] - [had] vs [had]
Iter = 31800, average loss= 0.018356, average accuracy= 100.00%
['Steve', '.', 'Steve'] - [had] vs [had]
Iter = 31900, average loss= 0.016373, average accuracy= 100.00%
['Steve', 'had', 'breakfast'] - [with] vs [with]
Iter = 32000, average

Iter = 38800, average loss= 0.001154, average accuracy= 100.00%
['.', 'Steve', 'had'] - [breakfast] vs [breakfast]
Iter = 38900, average loss= 0.001171, average accuracy= 100.00%
['had', 'lunch', 'with'] - [Sarah] vs [Sarah]
Iter = 39000, average loss= 0.001208, average accuracy= 100.00%
['with', 'Steve', '.'] - [Steve] vs [Steve]
Iter = 39100, average loss= 0.000969, average accuracy= 100.00%
['with', 'Sarah', '.'] - [Sarah] vs [Sarah]
Iter = 39200, average loss= 0.000811, average accuracy= 100.00%
['Steve', 'had', 'breakfast'] - [with] vs [with]
Iter = 39300, average loss= 0.000920, average accuracy= 100.00%
['Sarah', 'had', 'dinner'] - [with] vs [with]
Iter = 39400, average loss= 0.000549, average accuracy= 100.00%
['.', 'Steve', 'had'] - [breakfast] vs [breakfast]
Iter = 39500, average loss= 0.000827, average accuracy= 100.00%
['Steve', '.', 'Steve'] - [had] vs [had]
Iter = 39600, average loss= 0.000914, average accuracy= 100.00%
['breakfast', 'with', 'David'] - [.] vs [.]
Iter = 3

Iter = 46400, average loss= 0.000075, average accuracy= 100.00%
['.', 'Steve', 'had'] - [breakfast] vs [breakfast]
Iter = 46500, average loss= 0.000055, average accuracy= 100.00%
['had', 'dinner', 'with'] - [Steve] vs [Steve]
Iter = 46600, average loss= 0.000066, average accuracy= 100.00%
['had', 'lunch', 'with'] - [Sarah] vs [Sarah]
Iter = 46700, average loss= 0.000062, average accuracy= 100.00%
['dinner', 'with', 'Steve'] - [.] vs [.]
Iter = 46800, average loss= 0.000048, average accuracy= 100.00%
['had', 'breakfast', 'with'] - [David] vs [David]
Iter = 46900, average loss= 0.000041, average accuracy= 100.00%
['dinner', 'with', 'Steve'] - [.] vs [.]
Iter = 47000, average loss= 0.000047, average accuracy= 100.00%
['dinner', 'with', 'Steve'] - [.] vs [.]
Iter = 47100, average loss= 0.000042, average accuracy= 100.00%
['dinner', 'with', 'Steve'] - [.] vs [.]
Iter = 47200, average loss= 0.000045, average accuracy= 100.00%
['.', 'Steve', 'had'] - [breakfast] vs [breakfast]
Iter = 47300, a