In [None]:
import time
from collections import namedtuple
import numpy as np
import tensorflow as tf

# Data Preparation

## Load the file

In [None]:
with open('anna.txt', 'r') as f:
    text = f.read()
vocab = sorted(set(text))
vocab_to_int = {c: i for i, c in enumerate(vocab)}
int_to_vocab = dict(enumerate(vocab))

# Convert all the text as integer
encoded = np.array([vocab_to_int[c] for c in text], dtype=np.int32)

## Create mini batch

In [None]:
def get_batches(arr, batch_size, n_steps):
    # Get the total number of characters per batch
    chars_per_batch = batch_size * n_steps

    # Total Number of batchs
    n_batches = len(arr) // chars_per_batch

    # Get the full pack of charactors for each batch
    arr = arr[:n_batches * chars_per_batch]

    # Reshape the array by batch_size, Horizonal reshaping based on batch size.
    arr = arr.reshape((batch_size, -1))

    # Create steps from mini batch, Keep in mind this is a matrix
    for n in range(0, arr.shape[1], n_steps):
        x = arr[:, n:n + n_steps]

        # At last batch the y will be sort by 1 charactor. That will create error in `tf.nn.dynamic_rnn` for array size mis match
        y_temp = arr[:, n + 1:n + n_steps + 1]
        
        # Create a zero array and append with y output
        y = np.zeros(x.shape, dtype=np.int32)
        y[:, :y_temp.shape[1]] = y_temp
        
        # To handle memory
        yield x, y

### Test batch function

In [None]:
# Test
batch_size = 10; n_steps=5
batchs = get_batchs(encoded, batch_size, n_steps)
x, y = next(batchs)
print(x)
print(y)

# Tensorflow Initialization

## Building Inputs

In [None]:
def build_inputs(batch_size, n_steps):
    inputs = tf.placeholder(
        shape=[batch_size, n_steps], dtype=tf.int32, name='inputs')
    
    outputs = tf.placeholder(
        shape=[batch_size, n_steps], dtype=tf.int32, name='outputs')
    
    keep_prob = tf.placeholder(dtype=tf.float32, name='keep_prob')
    
    print('Shape of the input tensor: {}'.format(inputs.get_shape()))
    print('Shape of the output tensor: {}'.format(outputs.get_shape()))
    
    return inputs, outputs, keep_prob

## LSTM cells

In [None]:
def build_lstm(lstm_size, num_layers, batch_size, keep_prob):

    # Single cell creation
    def build_cell(lstm_size, keep_prob):
        cell = tf.contrib.rnn.BasicLSTMCell(lstm_size)
        drop = tf.contrib.rnn.DropoutWrapper(cell, output_keep_prob=keep_prob)
        return drop

    # Initialize different cells by calling single cell creation function
    cells = tf.contrib.rnn.MultiRNNCell(
        [build_cell(lstm_size, keep_prob) for _ in range(num_layers)])
    
    print('ALL LSTM cells creation: {}'.format(cells))

    # Values that pass to another cell horizontally for memory or time-steps
    initial_state = cells.zero_state(batch_size, tf.float32)
    
    return cells, initial_state

## Build Output 

In [None]:
def build_output(lstm_output, lstm_size, number_of_classes):
    # lstm_output comes as list. Concat to create as array
    pre_output = tf.concat(lstm_output, axis=1)

    # Reshape the output matrix as [batch_size*n_steps X lstm_size]
    # Each row output for each charactor. The number of rows will batch_size*n_steps.
    output = tf.reshape(pre_output, [-1, lstm_size])

    # Apply softmax function by creation a softmax layer
    # `variable_scope` helps to change the default name for weight initialization.
    #  Because RNN cell also have weights and bias with the default
    with tf.variable_scope('softmax'):
        softmax_w = tf.Variable(
            tf.truncated_normal((lstm_size, number_of_classes), stddev=0.1))
        softmax_b = tf.Variable(tf.zeros(number_of_classes))

    # Input for softmax function
    logits = tf.matmul(output, softmax_w) + softmax_b

    predictions = tf.nn.softmax(logits, name='predictions')
    
    return predictions, logits

## Training loss

In [None]:
def build_loss(logits, targets, lstm_size, num_of_classes):

    # convert the numbers to one hot encoding
    target_one_hot = tf.one_hot(targets, num_of_classes)

    # Change the shape similer to output from RNN
    target = tf.reshape(target_one_hot, logits.get_shape())

    # Apply cross entrophy
    loss = tf.nn.softmax_cross_entropy_with_logits(
        logits=logits, labels=target)
    
    loss = tf.reduce_mean(loss)

    return loss

## Optimizer or Simple gradient decent

* `tf.trainable_variables`: weights and bias used during the training, This is assigned by TF

* `tf.gradients`: Constructs symbolic partial derivatives of sum of ys w.r.t. x in xs. That return $\delta w, \delta b = tf.gradients(cost, [W, b])$. The `tf.gradients()` returns the gradient of cost wrt each tensor in the second argument as a list in the same order.

* `tf.clip_by_global_norm` is a function to put the upper bound on the gradient value wrt to each variables. `grad_clip` is the upper bound.

* AdamOptimizer is used to minimise the prediction error. It is variation of gradient descent. 
Here's a list on optimizers

In [None]:
def build_optimizer(loss, learning_rate, grad_clip):
    # weights and bias used during the training, This is assigned by TF
    tvars = tf.trainable_variables()
    grads, _ = tf.clip_by_global_norm(tf.gradients(loss, tvars), grad_clip)
    
    train_op = tf.train.AdamOptimizer(learning_rate)
    
    # Tune the weights
    optimizer = train_op.apply_gradients(zip(grads, tvars))
    
    return optimizer

# Building Network

In [None]:
class CharRNN:
    def __init__(self, num_classes, batch_size=64, num_steps = 50, lstm_size=128, num_layers = 2, learning_rate=0.001,
                grad_clip=5, sampling=False):

        if sampling==True:
            batch_size, num_steps = 1

        # Reset all variabled that has mapped with this model
        tf.reset_default_graph()
        
        # Tensor input
        self.inputs, self.targets, self.keep_prob = build_inputs(batch_size, num_steps)
        
        # Tensor cells and state.
        cell, self.initial_state = build_lstm(lstm_size, num_layers, batch_size, self.keep_prob)
        
#         # Encode the inputs.
        input_one_hot = tf.one_hot(self.inputs, num_classes)
        
#         # Create a RNN network with LSTM cell, input and initial state
        outputs, state = tf.nn.dynamic_rnn(cell, input_one_hot, initial_state=self.initial_state)
        self.final_state = state
        
        self.prediction, self.logits = build_output(outputs, lstm_size, num_classes)
        
        self.loss = build_loss(self.logits, self.targets, lstm_size, num_classes)
        
        self.optimizer = build_optimizer(self.loss, learning_rate, grad_clip)

In [None]:
batch_size = 100        # Sequences per batch
num_steps = 100         # Number of sequence steps per batch
lstm_size = 512         # Size of hidden layers in LSTMs
num_layers = 2          # Number of LSTM layers
learning_rate = 0.001   # Learning rate
keep_prob = 0.5         # Dropout keep probability

In [None]:
epochs = 20

# Print losses every N interations
print_every_n = 50

# Save every N iterations
save_every_n = 200

model = CharRNN(len(vocab), batch_size=batch_size, num_steps=num_steps,
                lstm_size=lstm_size, num_layers=num_layers, 
                learning_rate=learning_rate)

saver = tf.train.Saver(max_to_keep=100)

In [None]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    
    # Use the line below to load a checkpoint and resume training
    #saver.restore(sess, 'checkpoints/______.ckpt')
    counter = 0
    for e in range(epochs):
        # Train network
        new_state = sess.run(model.initial_state)
        loss = 0
        for x, y in get_batches(encoded, batch_size, num_steps):
            counter += 1
            start = time.time()
            feed = {model.inputs: x,
                    model.targets: y,
                    model.keep_prob: keep_prob,
                    model.initial_state: new_state}
            batch_loss, new_state, _ = sess.run([model.loss, 
                                                 model.final_state, 
                                                 model.optimizer], 
                                                 feed_dict=feed)
            if (counter % print_every_n == 0):
                end = time.time()
                print('Epoch: {}/{}... '.format(e+1, epochs),
                      'Training Step: {}... '.format(counter),
                      'Training loss: {:.4f}... '.format(batch_loss),
                      '{:.4f} sec/batch'.format((end-start)))
        
            if (counter % save_every_n == 0):
                saver.save(sess, "checkpoints/i{}_l{}.ckpt".format(counter, lstm_size))
    
    saver.save(sess, "checkpoints/i{}_l{}.ckpt".format(counter, lstm_size))

# Trained models