## Text-gen RNN

### Import relevant libraries

In [17]:
import time
from collections import namedtuple

import numpy as np
import tensorflow as tf

### Load text into a one huge string (millions of chars)

In [18]:
with open('anna.txt', 'r') as f:
    text=f.read()

In [19]:
print(text[:100])
print(len(text))

Chapter 1


Happy families are all alike; every unhappy family is unhappy in its own
way.

Everythin
1966145


### Create vocabulary : a set of all chars of which 'text' consists

In [20]:
vocab = set(text)

In [21]:
print(vocab)


{'v', 'D', '7', 'Z', '.', 'L', 'U', 's', 'E', ',', '2', 'n', ')', 'T', 'H', 'p', 'w', "'", ':', 'M', '6', 'G', '8', 'J', 'q', 'I', 'C', 'X', ' ', '5', 'g', '`', 'A', 'B', 'O', 'j', 'i', 'h', 'F', 'x', 'm', 'o', 'N', '-', ';', 'P', 'S', 'R', '_', 't', 'l', '(', 'e', 'y', '1', 'a', 'r', '?', 'z', 'b', '!', 'Y', 'k', 'f', '"', 'K', '4', 'c', 'd', 'u', 'V', '3', '9', '0', '\n', 'Q', 'W'}


### Create vocab_to_int and int_to_vocab. These are dictionaries. You won't need it. Hopefully

In [22]:
vocab_to_int = {c: i for i, c in enumerate(vocab)}
int_to_vocab = dict(enumerate(vocab))

In [23]:
print(vocab_to_int)
print()
print(int_to_vocab)

{'v': 0, 'D': 1, 'Z': 3, '.': 4, 'L': 5, 'U': 6, ',': 9, 'E': 8, '7': 2, '"': 64, 'n': 11, ')': 12, 'T': 13, 'H': 14, 'w': 16, "'": 17, ':': 18, '6': 20, 'J': 23, 'G': 21, '?': 57, 'I': 25, 'C': 26, 'X': 27, ' ': 28, '5': 29, 'K': 65, 'g': 30, '`': 31, 'A': 32, 'B': 33, 'O': 34, 'j': 35, 'i': 36, 'h': 37, 'F': 38, 'x': 39, '9': 72, 'M': 19, 'Q': 75, 'm': 40, 's': 7, '8': 22, 'N': 42, '-': 43, ';': 44, '2': 10, 'o': 41, 'P': 45, 'S': 46, 'R': 47, '_': 48, 't': 49, '(': 51, 'e': 52, 'q': 24, 'y': 53, '1': 54, 'a': 55, 'r': 56, 'p': 15, 'W': 76, '4': 66, 'z': 58, 'b': 59, '!': 60, 'Y': 61, 'k': 62, 'f': 63, 'l': 50, 'c': 67, 'd': 68, 'u': 69, '0': 73, '3': 71, '\n': 74, 'V': 70}

{0: 'v', 1: 'D', 2: '7', 3: 'Z', 4: '.', 5: 'L', 6: 'U', 7: 's', 8: 'E', 9: ',', 10: '2', 11: 'n', 12: ')', 13: 'T', 14: 'H', 15: 'p', 16: 'w', 17: "'", 18: ':', 19: 'M', 20: '6', 21: 'G', 22: '8', 23: 'J', 24: 'q', 25: 'I', 26: 'C', 27: 'X', 28: ' ', 29: '5', 30: 'g', 31: '`', 32: 'A', 33: 'B', 34: 'O', 35: 'j',

### Create an iteger representation of 'text' (millions of chars as ints)

In [26]:
#chars = np.array([ord(c) for c in text], dtype=np.int32)
chars = np.array([vocab_to_int[c] for c in text], dtype=np.int32)
print(chars[:100])

[26 37 55 15 49 52 56 28 54 74 74 74 14 55 15 15 53 28 63 55 40 36 50 36 52
  7 28 55 56 52 28 55 50 50 28 55 50 36 62 52 44 28 52  0 52 56 53 28 69 11
 37 55 15 15 53 28 63 55 40 36 50 53 28 36  7 28 69 11 37 55 15 15 53 28 36
 11 28 36 49  7 28 41 16 11 74 16 55 53  4 74 74  8  0 52 56 53 49 37 36 11]


## Data split

In [27]:
def split_data(chars, batch_size, num_steps, split_frac=0.9):
    """ 
    Split character data into training and validation sets, inputs and targets for each set.
    
    Arguments
    ---------
    chars: character array
    batch_size: Size of examples in each of batch
    num_steps: Number of sequence steps to keep in the input and pass to the network
    split_frac: Fraction of batches to keep in the training set
    
    
    Returns train_x, train_y, val_x, val_y
    """
    
    
    slice_size = batch_size * num_steps
    n_batches = int(len(chars) / slice_size)
    
    # Drop the last few characters to make only full batches
    x = chars[: n_batches*slice_size]
    y = chars[1: n_batches*slice_size + 1]
    
    # Split the data into batch_size slices, then stack them into a 2D matrix 
    x = np.stack(np.split(x, batch_size))
    y = np.stack(np.split(y, batch_size))
    
    # Now x and y are arrays with dimensions batch_size x n_batches*num_steps
    
    # Split into training and validation sets, keep the virst split_frac batches for training
    split_idx = int(n_batches*split_frac)
    train_x, train_y= x[:, :split_idx*num_steps], y[:, :split_idx*num_steps]
    val_x, val_y = x[:, split_idx*num_steps:], y[:, split_idx*num_steps:]
    
    return train_x, train_y, val_x, val_y

In [28]:
train_x, train_y, val_x, val_y = split_data(chars, 10, 200)

train_x.shape

(10, 176800)

In [29]:
def get_batch(arrs, num_steps):
    batch_size, slice_size = arrs[0].shape
    
    n_batches = int(slice_size/num_steps)
    for b in range(n_batches):
        yield [x[:, b*num_steps: (b+1)*num_steps] for x in arrs]

#### Creating training and validation sets using function defined above

### Building the model

In [30]:
def build_rnn(num_classes, batch_size=50, num_steps=50, lstm_size=128, num_layers=2,
              learning_rate=0.001, grad_clip=5, sampling=False):
        
    if sampling == True:
        batch_size, num_steps = 1, 1

    tf.reset_default_graph()
    
    # Declare placeholders we'll feed into the graph
    with tf.name_scope('inputs'):
        inputs = tf.placeholder(tf.int32, [batch_size, num_steps], name='inputs')
        x_one_hot = tf.one_hot(inputs, num_classes, name='x_one_hot')
    
    with tf.name_scope('targets'):
        targets = tf.placeholder(tf.int32, [batch_size, num_steps], name='targets')
        y_one_hot = tf.one_hot(targets, num_classes, name='y_one_hot')
        y_reshaped = tf.reshape(y_one_hot, [-1, num_classes])
    
    keep_prob = tf.placeholder(tf.float32, name='keep_prob')
    
    # Build the RNN layers
    with tf.name_scope("RNN_cells"):
        lstm = tf.contrib.rnn.BasicLSTMCell(lstm_size)
        drop = tf.contrib.rnn.DropoutWrapper(lstm, output_keep_prob=keep_prob)
        cell = tf.contrib.rnn.MultiRNNCell([drop] * num_layers)
    
    with tf.name_scope("RNN_init_state"):
        initial_state = cell.zero_state(batch_size, tf.float32)

    # Run the data through the RNN layers
    with tf.name_scope("RNN_forward"):
        rnn_inputs = [tf.squeeze(i, squeeze_dims=[1]) for i in tf.split(x_one_hot, num_steps, 1)]
        outputs, state = tf.contrib.rnn.static_rnn(cell, rnn_inputs, initial_state=initial_state)
    
    final_state = state
    
    # Reshape output so it's a bunch of rows, one row for each cell output
    with tf.name_scope('sequence_reshape'):
        seq_output = tf.concat(outputs, axis=1,name='seq_output')
        output = tf.reshape(seq_output, [-1, lstm_size], name='graph_output')
    
    # Now connect the RNN outputs to a softmax layer and calculate the cost
    with tf.name_scope('logits'):
        softmax_w = tf.Variable(tf.truncated_normal((lstm_size, num_classes), stddev=0.1),
                               name='softmax_w')
        softmax_b = tf.Variable(tf.zeros(num_classes), name='softmax_b')
        logits = tf.matmul(output, softmax_w) + softmax_b
        tf.summary.histogram('softmax_w', softmax_w)
        tf.summary.histogram('softmax_b', softmax_b)

    with tf.name_scope('predictions'):
        preds = tf.nn.softmax(logits, name='predictions')
        tf.summary.histogram('predictions', preds)
    
    with tf.name_scope('cost'):
        loss = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y_reshaped, name='loss')
        cost = tf.reduce_mean(loss, name='cost')
        tf.summary.scalar('cost', cost)

    # Optimizer for training, using gradient clipping to control exploding gradients
    with tf.name_scope('train'):
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), grad_clip)
        train_op = tf.train.AdamOptimizer(learning_rate)
        optimizer = train_op.apply_gradients(zip(grads, tvars))
    
    merged = tf.summary.merge_all()
    
    # Export the nodes 
    export_nodes = ['inputs', 'targets', 'initial_state', 'final_state',
                    'keep_prob', 'cost', 'preds', 'optimizer', 'merged']
    Graph = namedtuple('Graph', export_nodes)
    local_dict = locals()
    graph = Graph(*[local_dict[each] for each in export_nodes])
    
    return graph

## Training

In [31]:
batch_size = 100
num_steps = 100
lstm_size = 512
num_layers = 2
learning_rate = 0.002
keep_prob = 0.5

### Write out the graph for TensorBoard

In [32]:
!mkdir -p checkpoints/anna

In [33]:
def train(model, epochs, train_writer, test_writer):
    
    saver = tf.train.Saver(max_to_keep=100)
    
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        # Use the line below to load a checkpoint and resume training
        #saver.restore(sess, 'checkpoints/anna20.ckpt')

        n_batches = int(train_x.shape[1]/num_steps)
        iterations = n_batches * epochs
        for e in range(epochs):

            # Train network
            new_state = sess.run(model.initial_state)
            loss = 0
            for b, (x, y) in enumerate(get_batch([train_x, train_y], num_steps), 1):
                iteration = e*n_batches + b
                start = time.time()
                feed = {model.inputs: x,
                        model.targets: y,
                        model.keep_prob: 0.5,
                        model.initial_state: new_state}
                summary, batch_loss, new_state, _ = sess.run([model.merged, model.cost, 
                                                              model.final_state, model.optimizer], 
                                                              feed_dict=feed)
                loss += batch_loss
                end = time.time()
                print('Epoch {}/{} '.format(e+1, epochs),
                      'Iteration {}/{}'.format(iteration, iterations),
                      'Training loss: {:.4f}'.format(loss/b),
                      '{:.4f} sec/batch'.format((end-start)))

                train_writer.add_summary(summary, iteration)
                
                if (iteration%200 == 0) or (iteration == iterations):
                    # Check performance, notice dropout has been set to 1
                    val_loss = []
                    new_state = sess.run(model.initial_state)
                    for x, y in get_batch([val_x, val_y], num_steps):
                        feed = {model.inputs: x,
                                model.targets: y,
                                model.keep_prob: 1.,
                                model.initial_state: new_state}
                        summary, batch_loss, new_state = sess.run([model.merged,
                                                                   model.cost, 
                                                                   model.final_state], 
                                                                  feed_dict=feed)
                        val_loss.append(batch_loss)

                    test_writer.add_summary(summary, iteration)

                    print('Validation loss:', np.mean(val_loss),
                          'Saving checkpoint!')
                    # Below command is commented out in Mat's version
                    saver.save(sess, 
                               "checkpoints/anna/i{}_l{}_{:.3f}.ckpt".format(iteration, 
                                                                             lstm_size, 
                                                                             np.mean(val_loss)))

In [34]:
epochs = 20
batch_size = 100
num_steps = 100
train_x, train_y, val_x, val_y = split_data(chars, batch_size, num_steps)

for lstm_size in [512]:
    for num_layers in [2]:
        for learning_rate in [0.001, 0.002]:
            
            log_string_train = 'logs/anna/train/lr={},rl={},ru={}'.format(learning_rate, num_layers, lstm_size)
            train_writer = tf.summary.FileWriter(log_string_train)
            
            log_string_test = 'logs/anna/test/lr={},rl={},ru={}'.format(learning_rate, num_layers, lstm_size)
            test_writer = tf.summary.FileWriter(log_string_test)
            
            model = build_rnn(len(vocab), 
                    batch_size=batch_size,
                    num_steps=num_steps,
                    learning_rate=learning_rate,
                    lstm_size=lstm_size,
                    num_layers=num_layers)
            
            train(model, epochs, train_writer, test_writer)

Epoch 1/20  Iteration 1/3520 Training loss: 4.3434 1.5680 sec/batch
Epoch 1/20  Iteration 2/3520 Training loss: 4.2989 0.3407 sec/batch
Epoch 1/20  Iteration 3/3520 Training loss: 4.1235 0.2970 sec/batch
Epoch 1/20  Iteration 4/3520 Training loss: 4.2707 0.2919 sec/batch
Epoch 1/20  Iteration 5/3520 Training loss: 4.2358 0.2917 sec/batch
Epoch 1/20  Iteration 6/3520 Training loss: 4.1662 0.3007 sec/batch
Epoch 1/20  Iteration 7/3520 Training loss: 4.0919 0.2922 sec/batch
Epoch 1/20  Iteration 8/3520 Training loss: 4.0167 0.2967 sec/batch
Epoch 1/20  Iteration 9/3520 Training loss: 3.9485 0.2925 sec/batch
Epoch 1/20  Iteration 10/3520 Training loss: 3.8913 0.2929 sec/batch
Epoch 1/20  Iteration 11/3520 Training loss: 3.8440 0.2924 sec/batch
Epoch 1/20  Iteration 12/3520 Training loss: 3.8041 0.2917 sec/batch
Epoch 1/20  Iteration 13/3520 Training loss: 3.7694 0.2917 sec/batch
Epoch 1/20  Iteration 14/3520 Training loss: 3.7372 0.2924 sec/batch
Epoch 1/20  Iteration 15/3520 Training loss

In [36]:
tf.train.get_checkpoint_state('checkpoints/anna')

model_checkpoint_path: "checkpoints/anna/i3520_l512_1.082.ckpt"
all_model_checkpoint_paths: "checkpoints/anna/i200_l512_2.081.ckpt"
all_model_checkpoint_paths: "checkpoints/anna/i400_l512_1.638.ckpt"
all_model_checkpoint_paths: "checkpoints/anna/i600_l512_1.441.ckpt"
all_model_checkpoint_paths: "checkpoints/anna/i800_l512_1.339.ckpt"
all_model_checkpoint_paths: "checkpoints/anna/i1000_l512_1.275.ckpt"
all_model_checkpoint_paths: "checkpoints/anna/i1200_l512_1.218.ckpt"
all_model_checkpoint_paths: "checkpoints/anna/i1400_l512_1.193.ckpt"
all_model_checkpoint_paths: "checkpoints/anna/i1600_l512_1.167.ckpt"
all_model_checkpoint_paths: "checkpoints/anna/i1800_l512_1.146.ckpt"
all_model_checkpoint_paths: "checkpoints/anna/i2000_l512_1.130.ckpt"
all_model_checkpoint_paths: "checkpoints/anna/i2200_l512_1.121.ckpt"
all_model_checkpoint_paths: "checkpoints/anna/i2400_l512_1.113.ckpt"
all_model_checkpoint_paths: "checkpoints/anna/i2600_l512_1.104.ckpt"
all_model_checkpoint_paths: "checkpoints/an

## Sampling

In [37]:
def pick_top_n(preds, vocab_size, top_n=5):
    p = np.squeeze(preds)
    p[np.argsort(p)[:-top_n]] = 0
    p = p / np.sum(p)
    c = np.random.choice(vocab_size, 1, p=p)[0]
    return c

In [38]:
def sample(checkpoint, n_samples, lstm_size, vocab_size, prime="The "):
    samples = [c for c in prime]
    model = build_rnn(vocab_size, lstm_size=lstm_size, sampling=True)
    saver = tf.train.Saver()
    with tf.Session() as sess:
        saver.restore(sess, checkpoint)
        new_state = sess.run(model.initial_state)
        for c in prime:
            x = np.zeros((1, 1))
#             x[0,0] = ord(c)# vocab_to_int[c]
            x[0,0] = vocab_to_int[c]
            feed = {model.inputs: x,
                    model.keep_prob: 1.,
                    model.initial_state: new_state}
            preds, new_state = sess.run([model.preds, model.final_state], 
                                         feed_dict=feed)

        c = pick_top_n(preds, len(vocab), 5)
#         samples.append(chr(c))#(int_to_vocab[c])
        samples.append(int_to_vocab[c])

        for i in range(n_samples):
            x[0,0] = c
            feed = {model.inputs: x,
                    model.keep_prob: 1.,
                    model.initial_state: new_state}
            preds, new_state = sess.run([model.preds, model.final_state], 
                                         feed_dict=feed)

            c = pick_top_n(preds, len(vocab))
            #samples.append(chr(c))# (int_to_vocab[c])
            samples.append(int_to_vocab[c])
        
    return ''.join(samples)

In [42]:
batch_size = 100
num_steps = 100
lstm_size = 512
num_layers = 2
learning_rate = 0.002
keep_prob = 0.5

checkpoint = "checkpoints/anna/i3520_l512_1.082.ckpt"
samp = sample(checkpoint, 2000, lstm_size, len(vocab), prime="pleasant")
print(samp)

pleasant, and the
maid stood, and will hear that I was the clerk with his state. I don't
understand, but I," she said, standing at the trap, and with a smile, which
the meaning of some woman she was the consequence that he could come of
the subject, and he was a left alone, threw him to his wife's second
that to be saying to him.

Then when an even sent to him when a bit was not married in a sort
with her. He, at once there was so attendon to see her faces. The second
spirits of the more composed with her hands, was an instant what he saw with
her husband, was thinking of through a porter, and the meaning of
his hands, though he had no merely compose there would be three man. He
was aware, which, he was the sense of his side with his house; as she was so as
soon as she felt to say she had not always did...

He had broken or talking of him and the possession of the most, but
he could not give herself into that moment of this. The prince sat down into his
close with self shouting and dru