## Text-gen RNN

### Import relevant libraries

In [1]:
import time
from collections import namedtuple

import numpy as np
import tensorflow as tf

### Load text into a one huge string (millions of chars)

In [2]:
with open('anna.txt', 'r') as f:
    text=f.read()

In [3]:
print(text[:100])
print(len(text))

Chapter 1


Happy families are all alike; every unhappy family is unhappy in its own
way.

Everythin
1966145


### Create vocabulary : a set of all chars of which 'text' consists

In [4]:
vocab = set(text)

In [5]:
print(vocab)


{'n', '.', 'I', '4', '6', 'J', 'K', 'F', ' ', 'H', 'e', 'L', 'S', '3', '0', ',', 'u', 'o', '2', 'V', '8', 'B', 'c', 'R', 'v', '!', '1', 'O', '_', 'a', 'w', ':', 'G', 'b', '7', 'P', 'h', 'k', 'q', 'A', 'm', 'y', 'z', 'X', '9', 'Y', 'C', 'r', '(', 'g', '"', ';', 't', 'i', "'", 'N', 'Q', 'T', 'l', 'p', '-', 'M', 'j', 'x', 'Z', 'U', '`', 'W', ')', '5', 'f', 'E', 's', 'D', '?', '\n', 'd'}


### Create vocab_to_int and int_to_vocab. These are dictionaries. You won't need it. Hopefully

In [6]:
vocab_to_int = {c: i for i, c in enumerate(vocab)}
int_to_vocab = dict(enumerate(vocab))

In [7]:
print(vocab_to_int)
print()
print(int_to_vocab)

{'n': 0, 'r': 47, '.': 1, '4': 3, '6': 4, 'J': 5, 'K': 6, 'F': 7, 'H': 9, '0': 14, 'e': 10, 'L': 11, 'S': 12, 'U': 65, ',': 15, 'u': 16, 'o': 17, '-': 60, '2': 18, 'V': 19, '8': 20, 'B': 21, 'c': 22, 'R': 23, 'v': 24, 'I': 2, '!': 25, ' ': 8, '`': 66, '1': 26, ')': 68, 'O': 27, 'N': 55, '_': 28, ':': 31, 'w': 30, 'G': 32, 'b': 33, 'M': 61, 'P': 35, 'h': 36, 'k': 37, 'q': 38, 'A': 39, 'm': 40, 'z': 42, '9': 44, 'Y': 45, 'C': 46, '(': 48, 'g': 49, '"': 50, ';': 51, 't': 52, 'i': 53, "'": 54, 'X': 43, 'Q': 56, 'T': 57, 'l': 58, 'y': 41, 'Z': 64, 'j': 62, 'p': 59, '?': 74, '7': 34, 'W': 67, '3': 13, '5': 69, 'f': 70, 'E': 71, 's': 72, 'D': 73, 'a': 29, '\n': 75, 'x': 63, 'd': 76}

{0: 'n', 1: '.', 2: 'I', 3: '4', 4: '6', 5: 'J', 6: 'K', 7: 'F', 8: ' ', 9: 'H', 10: 'e', 11: 'L', 12: 'S', 13: '3', 14: '0', 15: ',', 16: 'u', 17: 'o', 18: '2', 19: 'V', 20: '8', 21: 'B', 22: 'c', 23: 'R', 24: 'v', 25: '!', 26: '1', 27: 'O', 28: '_', 29: 'a', 30: 'w', 31: ':', 32: 'G', 33: 'b', 34: '7', 35: 'P',

### Create an iteger representation of 'text' (millions of chars as ints)

In [8]:
#chars = np.array([ord(c) for c in text], dtype=np.int32)
chars = np.array([vocab_to_int[c] for c in text], dtype=np.int32)
print(chars[:100])

[46 36 29 59 52 10 47  8 26 75 75 75  9 29 59 59 41  8 70 29 40 53 58 53 10
 72  8 29 47 10  8 29 58 58  8 29 58 53 37 10 51  8 10 24 10 47 41  8 16  0
 36 29 59 59 41  8 70 29 40 53 58 41  8 53 72  8 16  0 36 29 59 59 41  8 53
  0  8 53 52 72  8 17 30  0 75 30 29 41  1 75 75 71 24 10 47 41 52 36 53  0]


## Data split

In [9]:
def split_data(chars, batch_size, num_steps, split_frac=0.9):
    """ 
    Split character data into training and validation sets, inputs and targets for each set.
    
    Arguments
    ---------
    chars: character array
    batch_size: Size of examples in each of batch
    num_steps: Number of sequence steps to keep in the input and pass to the network
    split_frac: Fraction of batches to keep in the training set
    
    
    Returns train_x, train_y, val_x, val_y
    """
    
    
    slice_size = batch_size * num_steps
    n_batches = int(len(chars) / slice_size)
    
    # Drop the last few characters to make only full batches
    x = chars[: n_batches*slice_size]
    y = chars[1: n_batches*slice_size + 1]
    
    # Split the data into batch_size slices, then stack them into a 2D matrix 
    x = np.stack(np.split(x, batch_size))
    y = np.stack(np.split(y, batch_size))
    
    # Now x and y are arrays with dimensions batch_size x n_batches*num_steps
    
    # Split into training and validation sets, keep the virst split_frac batches for training
    split_idx = int(n_batches*split_frac)
    train_x, train_y= x[:, :split_idx*num_steps], y[:, :split_idx*num_steps]
    val_x, val_y = x[:, split_idx*num_steps:], y[:, split_idx*num_steps:]
    
    return train_x, train_y, val_x, val_y

In [10]:
train_x, train_y, val_x, val_y = split_data(chars, 10, 200)

train_x.shape

(10, 176800)

In [11]:
def get_batch(arrs, num_steps):
    batch_size, slice_size = arrs[0].shape
    
    n_batches = int(slice_size/num_steps)
    for b in range(n_batches):
        yield [x[:, b*num_steps: (b+1)*num_steps] for x in arrs]

#### Creating training and validation sets using function defined above

### Building the model

In [12]:
def build_rnn(num_classes, batch_size=50, num_steps=50, lstm_size=128, num_layers=2,
              learning_rate=0.001, grad_clip=5, sampling=False):
        
    if sampling == True:
        batch_size, num_steps = 1, 1

    tf.reset_default_graph()
    
    # Declare placeholders we'll feed into the graph
    with tf.name_scope('inputs'):
        inputs = tf.placeholder(tf.int32, [batch_size, num_steps], name='inputs')
        x_one_hot = tf.one_hot(inputs, num_classes, name='x_one_hot')
    
    with tf.name_scope('targets'):
        targets = tf.placeholder(tf.int32, [batch_size, num_steps], name='targets')
        y_one_hot = tf.one_hot(targets, num_classes, name='y_one_hot')
        y_reshaped = tf.reshape(y_one_hot, [-1, num_classes])
    
    keep_prob = tf.placeholder(tf.float32, name='keep_prob')
    
    # Build the RNN layers
    with tf.name_scope("RNN_cells"):
        lstm = tf.contrib.rnn.BasicLSTMCell(lstm_size)
        drop = tf.contrib.rnn.DropoutWrapper(lstm, output_keep_prob=keep_prob)
        cell = tf.contrib.rnn.MultiRNNCell([drop] * num_layers)
    
    with tf.name_scope("RNN_init_state"):
        initial_state = cell.zero_state(batch_size, tf.float32)

    # Run the data through the RNN layers
    with tf.name_scope("RNN_forward"):
        rnn_inputs = [tf.squeeze(i, squeeze_dims=[1]) for i in tf.split(x_one_hot, num_steps, 1)]
        outputs, state = tf.contrib.rnn.static_rnn(cell, rnn_inputs, initial_state=initial_state)
    
    final_state = state
    
    # Reshape output so it's a bunch of rows, one row for each cell output
    with tf.name_scope('sequence_reshape'):
        seq_output = tf.concat(outputs, axis=1,name='seq_output')
        output = tf.reshape(seq_output, [-1, lstm_size], name='graph_output')
    
    # Now connect the RNN outputs to a softmax layer and calculate the cost
    with tf.name_scope('logits'):
        softmax_w = tf.Variable(tf.truncated_normal((lstm_size, num_classes), stddev=0.1),
                               name='softmax_w')
        softmax_b = tf.Variable(tf.zeros(num_classes), name='softmax_b')
        logits = tf.matmul(output, softmax_w) + softmax_b
        tf.summary.histogram('softmax_w', softmax_w)
        tf.summary.histogram('softmax_b', softmax_b)

    with tf.name_scope('predictions'):
        preds = tf.nn.softmax(logits, name='predictions')
        tf.summary.histogram('predictions', preds)
    
    with tf.name_scope('cost'):
        loss = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y_reshaped, name='loss')
        cost = tf.reduce_mean(loss, name='cost')
        tf.summary.scalar('cost', cost)

    # Optimizer for training, using gradient clipping to control exploding gradients
    with tf.name_scope('train'):
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), grad_clip)
        train_op = tf.train.AdamOptimizer(learning_rate)
        optimizer = train_op.apply_gradients(zip(grads, tvars))
    
    merged = tf.summary.merge_all()
    
    # Export the nodes 
    export_nodes = ['inputs', 'targets', 'initial_state', 'final_state',
                    'keep_prob', 'cost', 'preds', 'optimizer', 'merged']
    Graph = namedtuple('Graph', export_nodes)
    local_dict = locals()
    graph = Graph(*[local_dict[each] for each in export_nodes])
    
    return graph

## Training

In [13]:
batch_size = 100
num_steps = 100
lstm_size = 512
num_layers = 2
learning_rate = 0.002
keep_prob = 0.5

### Write out the graph for TensorBoard

In [14]:
!mkdir -p checkpoints/anna

In [22]:
def train(model, epochs, train_writer, test_writer):
    
    saver = tf.train.Saver(max_to_keep=100)
    
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        # Use the line below to load a checkpoint and resume training
        #saver.restore(sess, 'checkpoints/anna20.ckpt')

        n_batches = int(train_x.shape[1]/num_steps)
        iterations = n_batches * epochs
        for e in range(epochs):

            # Train network
            new_state = sess.run(model.initial_state)
            loss = 0
            for b, (x, y) in enumerate(get_batch([train_x, train_y], num_steps), 1):
                iteration = e*n_batches + b
                start = time.time()
                feed = {model.inputs: x,
                        model.targets: y,
                        model.keep_prob: 0.5,
                        model.initial_state: new_state}
                summary, batch_loss, new_state, _ = sess.run([model.merged, model.cost, 
                                                              model.final_state, model.optimizer], 
                                                              feed_dict=feed)
                loss += batch_loss
                end = time.time()
                print('Epoch {}/{} '.format(e+1, epochs),
                      'Iteration {}/{}'.format(iteration, iterations),
                      'Training loss: {:.4f}'.format(loss/b),
                      '{:.4f} sec/batch'.format((end-start)))

                train_writer.add_summary(summary, iteration)
                
                if (iteration%200 == 0) or (iteration == iterations):
                    # Check performance, notice dropout has been set to 1
                    val_loss = []
                    new_state = sess.run(model.initial_state)
                    for x, y in get_batch([val_x, val_y], num_steps):
                        feed = {model.inputs: x,
                                model.targets: y,
                                model.keep_prob: 1.,
                                model.initial_state: new_state}
                        summary, batch_loss, new_state = sess.run([model.merged,
                                                                   model.cost, 
                                                                   model.final_state], 
                                                                  feed_dict=feed)
                        val_loss.append(batch_loss)

                    test_writer.add_summary(summary, iteration)

                    print('Validation loss:', np.mean(val_loss),
                          'Saving checkpoint!')
                    # Below command is commented out in Mat's version
                    saver.save(sess, 
                               "checkpoints/anna/i{}_l{}_{:.3f}.ckpt".format(iteration, 
                                                                             lstm_size, 
                                                                             np.mean(val_loss)))

In [23]:
epochs = 20
batch_size = 100
num_steps = 100
train_x, train_y, val_x, val_y = split_data(chars, batch_size, num_steps)

for lstm_size in [256,512]:
    for num_layers in [1, 2]:
        for learning_rate in [0.002]:
            
            log_string_train = 'logs/anna/train/lr={},rl={},ru={}'.format(learning_rate, num_layers, lstm_size)
            train_writer = tf.summary.FileWriter(log_string_train)
            
            log_string_test = 'logs/anna/test/lr={},rl={},ru={}'.format(learning_rate, num_layers, lstm_size)
            test_writer = tf.summary.FileWriter(log_string_test)
            
            model = build_rnn(len(vocab), 
                    batch_size=batch_size,
                    num_steps=num_steps,
                    learning_rate=learning_rate,
                    lstm_size=lstm_size,
                    num_layers=num_layers)
            
            train(model, epochs, train_writer, test_writer)

Epoch 1/20  Iteration 1/3520 Training loss: 4.3420 0.6371 sec/batch
Epoch 1/20  Iteration 2/3520 Training loss: 4.3169 0.1283 sec/batch
Epoch 1/20  Iteration 3/3520 Training loss: 4.2543 0.1137 sec/batch
Epoch 1/20  Iteration 4/3520 Training loss: 4.1053 0.1096 sec/batch
Epoch 1/20  Iteration 5/3520 Training loss: 3.9753 0.1055 sec/batch
Epoch 1/20  Iteration 6/3520 Training loss: 3.8783 0.0985 sec/batch
Epoch 1/20  Iteration 7/3520 Training loss: 3.8034 0.0944 sec/batch
Epoch 1/20  Iteration 8/3520 Training loss: 3.7427 0.0929 sec/batch
Epoch 1/20  Iteration 9/3520 Training loss: 3.6939 0.0887 sec/batch
Epoch 1/20  Iteration 10/3520 Training loss: 3.6537 0.0883 sec/batch
Epoch 1/20  Iteration 11/3520 Training loss: 3.6174 0.0857 sec/batch
Epoch 1/20  Iteration 12/3520 Training loss: 3.5862 0.0887 sec/batch
Epoch 1/20  Iteration 13/3520 Training loss: 3.5597 0.0878 sec/batch
Epoch 1/20  Iteration 14/3520 Training loss: 3.5345 0.0875 sec/batch
Epoch 1/20  Iteration 15/3520 Training loss

In [24]:
tf.train.get_checkpoint_state('checkpoints/anna')

model_checkpoint_path: "checkpoints/anna/i3520_l512_1.090.ckpt"
all_model_checkpoint_paths: "checkpoints/anna/i200_l512_2.295.ckpt"
all_model_checkpoint_paths: "checkpoints/anna/i400_l512_1.791.ckpt"
all_model_checkpoint_paths: "checkpoints/anna/i600_l512_1.547.ckpt"
all_model_checkpoint_paths: "checkpoints/anna/i800_l512_1.416.ckpt"
all_model_checkpoint_paths: "checkpoints/anna/i1000_l512_1.336.ckpt"
all_model_checkpoint_paths: "checkpoints/anna/i1200_l512_1.284.ckpt"
all_model_checkpoint_paths: "checkpoints/anna/i1400_l512_1.246.ckpt"
all_model_checkpoint_paths: "checkpoints/anna/i1600_l512_1.218.ckpt"
all_model_checkpoint_paths: "checkpoints/anna/i1800_l512_1.189.ckpt"
all_model_checkpoint_paths: "checkpoints/anna/i2000_l512_1.155.ckpt"
all_model_checkpoint_paths: "checkpoints/anna/i2200_l512_1.143.ckpt"
all_model_checkpoint_paths: "checkpoints/anna/i2400_l512_1.129.ckpt"
all_model_checkpoint_paths: "checkpoints/anna/i2600_l512_1.117.ckpt"
all_model_checkpoint_paths: "checkpoints/an

## Sampling

In [25]:
def pick_top_n(preds, vocab_size, top_n=5):
    p = np.squeeze(preds)
    p[np.argsort(p)[:-top_n]] = 0
    p = p / np.sum(p)
    c = np.random.choice(vocab_size, 1, p=p)[0]
    return c

In [26]:
def sample(checkpoint, n_samples, lstm_size, vocab_size, prime="The "):
    samples = [c for c in prime]
    model = build_rnn(vocab_size, lstm_size=lstm_size, sampling=True)
    saver = tf.train.Saver()
    with tf.Session() as sess:
        saver.restore(sess, checkpoint)
        new_state = sess.run(model.initial_state)
        for c in prime:
            x = np.zeros((1, 1))
            #x[0,0] = ord(c)# vocab_to_int[c]
            x[0,0] = vocab_to_int[c]
            feed = {model.inputs: x,
                    model.keep_prob: 1.,
                    model.initial_state: new_state}
            preds, new_state = sess.run([model.preds, model.final_state], 
                                         feed_dict=feed)

        c = pick_top_n(preds, len(vocab), 1)
        #samples.append(chr(c))#(int_to_vocab[c])
        samples.append(int_to_vocab[c])

        for i in range(n_samples):
            x[0,0] = c
            feed = {model.inputs: x,
                    model.keep_prob: 1.,
                    model.initial_state: new_state}
            preds, new_state = sess.run([model.preds, model.final_state], 
                                         feed_dict=feed)

            c = pick_top_n(preds, len(vocab))
            #samples.append(chr(c))# (int_to_vocab[c])
            samples.append(int_to_vocab[c])
        
    return ''.join(samples)

In [31]:
checkpoint = "checkpoints/anna/i3520_l512_1.090.ckpt"
samp = sample(checkpoint, 2000, lstm_size, len(vocab), prime="Ivan")
print(samp)

Ivanovitch., and
this with their sounds with him, but that he had said, and his wife
was already book, but she felt so the province and the telling him of
anything but a pearant and to compose. He would have turned a long white
below with his comprehension. He was not the stear on the tall, they
all the steps of his cape of his hand to the position to this signs of
this fields about this. And all this had been, and she came into this
throat of the strange telring the poor of the sort, and his cross on
a service to set the feeling, and he could not the society, and that
he was all at once, and she will be a spirit at her. The mistress of
his own whole both open the childrons, all the place which was near
the step.

The chief walked a low words on the part of his stray, a long while with
a finger of the sumstle, this feeling of the massionary and work
and to blind the carriage at the prince, while a solition absorbed the
state of her sought to a little tatter. She was there at once of
wh