## Vanilla RNN on Tensorflow

In this notebook, we'll build a character-wise rnn trained on Anna Karenina books.
This tutorial is based on Andrej Karpathy's [RNN Post](http://karpathy.github.io/2015/05/21/rnn-effectiveness/)

<img src="https://github.com/udacity/deep-learning/raw/d94980095d1187998e2e0544966bb417f031390f/intro-to-rnns/assets/charseq.jpeg" alt="Drawing" style="width: 600px;">

In [1]:
import numpy as np 
import matplotlib.pyplot as plt
import tensorflow as tf

%matplotlib inline

First we'll load the text file and convert it into integers for our network to use. Here I'm creating a couple dictionaries to convert the characters to and from integers. Encoding the characters as integers makes it easier to use as input in the network.

In [2]:
with open('anna.txt', 'r') as f: # the dataset that we use
    text=f.read()

vocab = sorted(set(text))
vocab_to_int = {c: i for i, c in enumerate(vocab)}
int_to_vocab = dict(enumerate(vocab))
encoded = np.array([vocab_to_int[c] for c in text], dtype=np.int32)

In [3]:
def get_batches(arr, n_seqs, n_steps):
    '''Create a generator that returns batches of size
       n_seqs x n_steps from arr.
       
       Arguments
       ---------
       arr: Array you want to make batches from
       n_seqs: Batch size, the number of sequences per batch
       n_steps: Number of sequence steps per batch
    '''
    # Get the number of characters per batch and number of batches we can make
    characters_per_batch = n_seqs * n_steps
    n_batches = len(arr)//characters_per_batch
    
    # Keep only enough characters to make full batches
    arr = arr[:n_batches * characters_per_batch]
    
    # Reshape into n_seqs rows
    arr = arr.reshape((n_seqs, -1))
    
    for n in range(0, arr.shape[1], n_steps):
        # The features
        x = arr[:, n:n+n_steps]
        # The targets, shifted by one
        y = np.zeros_like(x)
        y[:, :-1], y[:, -1] = x[:, 1:], x[:, 0]
        yield x, y

In [4]:
batches = get_batches(encoded, 10, 50)
x, y = next(batches)

In [5]:
print('x\n', x[:10, :10])
print('\ny\n', y[:10, :10])

('x\n', array([[32, 65, 58, 73, 77, 62, 75,  2, 17,  1],
       [ 2, 76, 69, 62, 62, 79, 62,  2, 72, 63],
       [62, 75, 14,  4,  1,  0,  1,  0, 48, 77],
       [76,  2, 70, 62, 58, 71, 66, 71, 64,  1],
       [65, 62, 75, 12,  2, 35, 66, 71, 72, 64],
       [61,  1,  0, 76, 72,  2, 65, 62,  2, 65],
       [66, 60, 66, 71, 64,  2, 65, 62, 75,  2],
       [62, 71,  2, 72, 63, 63, 62, 71, 61, 62],
       [76,  2, 77, 58, 69, 68, 66, 71, 64,  2],
       [58, 70, 62,  2, 60, 66, 75, 60, 69, 62]], dtype=int32))
('\ny\n', array([[65, 58, 73, 77, 62, 75,  2, 17,  1,  0],
       [76, 69, 62, 62, 79, 62,  2, 72, 63,  2],
       [75, 14,  4,  1,  0,  1,  0, 48, 77, 62],
       [ 2, 70, 62, 58, 71, 66, 71, 64,  1,  0],
       [62, 75, 12,  2, 35, 66, 71, 72, 64, 62],
       [ 1,  0, 76, 72,  2, 65, 62,  2, 65, 58],
       [60, 66, 71, 64,  2, 65, 62, 75,  2, 65],
       [71,  2, 72, 63, 63, 62, 71, 61, 62, 61],
       [ 2, 77, 58, 69, 68, 66, 71, 64,  2, 58],
       [70, 62,  2, 60, 66, 75, 60, 

In [17]:
# hyperparam
hidden_size = 100
learning_rate = 1e-1
embedding_size = 300

In [6]:
def build_inputs(batch_size, num_steps):
    """
    """
    inputs = tf.placeholder(tf.int32, [batch_size, num_steps], name="inputs")
    targets = tf.placeholder(tf.int32, [batch_size, num_steps], name="targets")
    
    return inputs, targets

In [18]:
def initiate_embedding_lookup(inputs, output_size, embedding_size):
    embedding_weight = tf.get_variable('Embedding', [output_size, embedding_size])
    embedding_lookup = tf.nn.embedding_lookup(embedding_weight, inputs)
    return embedding_lookup

In [22]:
def build_vanilla_rnn(inputs, hidden_size, embedding_lookup):
    inputs = tf.unstack(embedding_lookup, axis=1)
    cell = tf.contrib.rnn.BasicRNNCell(hidden_size)
    output, state = tf.nn.static_rnn(cell, inputs, dtype=tf.float32)
    return tf.reshape(output, shape=(-1, hidden_size))

In [13]:
def build_output(hidden_size, output, output_size):
    weight = tf.get_variable('weight', [hidden_size, output_size])
    bias = tf.get_variable('bias', [output_size])
    logits = tf.matmul(output, weight) + bias
    
    return logits

In [15]:
def build_loss(targets, output_size, logits):
    one_hot_target = tf.one_hot(targets, output_size)
    one_hot_target = tf.reshape(one_hot_target, shape=(-1, output_size))
    loss = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=one_hot_target)
    loss = tf.reduce_mean(loss)
    return loss

In [16]:
def build_optimizer(learning_rate, loss):
    return tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)

In [19]:
inputs, targets = build_inputs(None, x.shape[1])

In [20]:
embedding_lookup = initiate_embedding_lookup(inputs, len(vocab ), embedding_size)

In [23]:
output = build_vanilla_rnn(inputs, hidden_size, embedding_lookup)

In [24]:
logits = build_output(hidden_size, output, len(vocab))

In [25]:
loss = build_loss(targets, len(vocab), logits)

In [26]:
opt = build_optimizer(learning_rate, loss)

In [None]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for e in range(10):
        for _x, _y in get_batches(encoded, 5, 50):
            feed = {
                inputs: _x,
                targets: _y,
            }
            batch_loss, _ = sess.run([loss, opt], feed_dict=feed)
            print batch_loss