# Character Level RNN for generating Shakespeare in Keras

In [2]:
# example from http://r2rt.com/recurrent-neural-networks-in-tensorflow-ii.html
"""
Imports
"""
from __future__ import print_function 
import numpy as np
import tensorflow as tf
import time
import os
from tensorflow.models.rnn.ptb import reader

"""
Load and process data, utility functions
"""

file_name = 'tiny-shakespeare.txt'

with open(file_name,'r') as f:
    raw_data = f.read()
    print("Data length:", len(raw_data))

vocab = set(raw_data)
vocab_size = len(vocab)
idx_to_vocab = dict(enumerate(vocab))
vocab_to_idx = dict(zip(idx_to_vocab.values(), idx_to_vocab.keys()))

data = [vocab_to_idx[c] for c in raw_data]
del raw_data

def gen_epochs(n, num_steps, batch_size):
    for i in range(n):
        yield reader.ptb_iterator(data, batch_size, num_steps)
        
def reset_graph():
    if 'sess' in globals() and sess:
        sess.close()
    tf.reset_default_graph()

ImportError: No module named models.tutorials.rnn.ptb

In [None]:
num_epochs = 1
num_steps = 200
state_size = 100 #512
batch_size = 32 #50
num_steps = 200

with tf.Session() as sess:
    sess.run(tf.initialize_all_variables())
    training_losses = []
    for idx, epoch in enumerate(gen_epochs(num_epochs, num_steps, batch_size)):
        training_loss = 0
        steps = 0
        training_state = None
        for X, Y in epoch:
            steps += 1
            feed_dict={g['x']: X, g['y']: Y}
            if training_state is not None:
                feed_dict[g['init_state']] = training_state
            training_loss_, training_state, _ = sess.run([g['total_loss'],
                                                      g['final_state'],
                                                      g['train_step']],
                                                             feed_dict)
                training_loss += training_loss_
            if verbose:
                print("Average training loss for Epoch", idx, ":", training_loss/steps)
            training_losses.append(training_loss/steps)

        if isinstance(save, str):
            g['saver'].save(sess, save)

In [None]:
## Define the network (for training)
learning_rate = 1e-4 #5e-4

reset_graph()

x = tf.placeholder(tf.int32, [batch_size, num_steps], name='input_placeholder')
y = tf.placeholder(tf.int32, [batch_size, num_steps], name='labels_placeholder')

embeddings = tf.get_variable('embedding_matrix', [vocab_size, state_size])
# Note that our inputs are no longer a list, but a tensor of dims batch_size x num_steps x state_size
rnn_inputs = tf.nn.embedding_lookup(embeddings, x)

cell = tf.nn.rnn_cell.LSTMCell(state_size, state_is_tuple=True)
cell = tf.nn.rnn_cell.MultiRNNCell([cell] * 3, state_is_tuple=True)
init_state = cell.zero_state(batch_size, tf.float32)
rnn_outputs, final_state = tf.nn.dynamic_rnn(cell, rnn_inputs, initial_state=init_state)

with tf.variable_scope('softmax'):
    W = tf.get_variable('W', [state_size, vocab_size])
    b = tf.get_variable('b', [vocab_size], initializer=tf.constant_initializer(0.0))

#reshape rnn_outputs and y so we can get the logits in a single matmul
rnn_outputs = tf.reshape(rnn_outputs, [-1, state_size])
y_reshaped = tf.reshape(y, [-1])

logits = tf.matmul(rnn_outputs, W) + b
predictions = tf.nn.softmax(logits)

total_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits, y_reshaped))
train_step = tf.train.AdamOptimizer(learning_rate).minimize(total_loss)

return dict(
    x = x,
    y = y,
    init_state = init_state,
    final_state = final_state,
    total_loss = total_loss,
    train_step = train_step,
    preds = predictions,
    saver = tf.train.Saver()
)


In [None]:



def generate_characters(g, checkpoint, num_chars, prompt='A', pick_top_chars=None):
    """ Accepts a current character, initial state"""

    with tf.Session() as sess:
        sess.run(tf.initialize_all_variables())
        g['saver'].restore(sess, checkpoint)

        state = None
        current_char = vocab_to_idx[prompt]
        chars = [current_char]

        for i in range(num_chars):
            if state is not None:
                feed_dict={g['x']: [[current_char]], g['init_state']: state}
            else:
                feed_dict={g['x']: [[current_char]]}

            preds, state = sess.run([g['preds'],g['final_state']], feed_dict)

            if pick_top_chars is not None:
                p = np.squeeze(preds)
                p[np.argsort(p)[:-pick_top_chars]] = 0
                p = p / np.sum(p)
                current_char = np.random.choice(vocab_size, 1, p=p)[0]
            else:
                current_char = np.random.choice(vocab_size, 1, p=np.squeeze(preds))[0]

            chars.append(current_char)

    chars = map(lambda x: idx_to_vocab[x], chars)
    print("".join(chars))
    return("".join(chars))

## Make the training RNN
g = build_multilayer_lstm_graph_with_dynamic_rnn(num_steps=80,
                state_size = 512,
                batch_size = 50,
                num_classes = vocab_size,
                learning_rate = 5e-4)

t = time.time()

num_epochs = 30
losses = train_network(g, num_epochs, num_steps=80, batch_size=50, save="saves/LSTM_30_epochs")
print("It took "+str(time.time() - t) + "seconds to train for " + str(num_epochs) + " epochs.")
print("The average loss on the final epoch was: " + str(losses[-1]))

## Make the generating RNN
g = build_multilayer_lstm_graph_with_dynamic_rnn(num_steps=1, state_size=512, batch_size=1, num_classes=vocab_size)
output = generate_characters(g, "saves/LSTM_30_epochs", 100000, prompt='T', pick_top_chars=5)

with open("shakespeare_output_text.txt", "w") as text_file:
    text_file.write(output)

print(output)