## Text-gen RNN

### Import relevant libraries

In [1]:
import time
from collections import namedtuple

import numpy as np
import tensorflow as tf

### Load text into a one huge string (millions of chars)

In [2]:
with open('anna.txt', 'r') as f:
    text=f.read()

In [3]:
print(text[:100])
print(len(text))

Chapter 1


Happy families are all alike; every unhappy family is unhappy in its own
way.

Everythin
1985223


### Create vocabulary : a set of all chars of which 'text' consists

In [4]:
vocab = set(text)

In [5]:
print(vocab)


{'X', 'k', '\n', 'q', 'p', '(', 'b', 'r', 'j', ':', 'U', '*', 'P', '/', 'l', 'M', '!', 'i', 'B', 'K', '9', 'A', 'c', 's', 'F', 'n', '-', 'S', 'f', 'O', 'u', '"', '_', '7', 'Q', 'I', ',', '2', 'G', '5', '$', 'E', 'y', 'H', 'R', 'm', ')', 'L', '8', 'Z', '`', 't', '3', '@', 'V', 'o', '0', 'a', '1', 'T', 'D', 'h', ';', '6', 'J', '&', 'g', '?', 'N', '4', 'W', 'e', '.', 'w', 'z', 'd', '%', 'C', ' ', 'x', "'", 'Y', 'v'}


### Create vocab_to_int and int_to_vocab. These are dictionaries. You won't need it. Hopefully

In [6]:
vocab_to_int = {c: i for i, c in enumerate(vocab)}
int_to_vocab = dict(enumerate(vocab))

In [7]:
print(vocab_to_int)
print()
print(int_to_vocab)

{'X': 0, 'k': 1, '\n': 2, 'q': 3, 'p': 4, 'O': 29, '(': 5, '8': 48, '%': 76, 'b': 6, 'j': 8, '*': 11, 'U': 10, 'P': 12, '/': 13, 'l': 14, 'M': 15, '!': 16, 'Z': 49, 'B': 18, '9': 20, '`': 50, 'A': 21, '_': 32, 's': 23, 'F': 24, 'n': 25, '-': 26, 'f': 28, '7': 33, 'u': 30, 'Q': 34, '5': 39, ',': 36, '2': 37, '$': 40, '3': 52, 'E': 41, '&': 65, 'y': 42, 'H': 43, 'I': 35, 'w': 73, 'm': 45, 'i': 17, ')': 46, 'L': 47, 'G': 38, 't': 51, '@': 53, 'V': 54, 'o': 55, '0': 56, 'a': 57, 'T': 59, 'D': 60, 'h': 61, ';': 62, '6': 63, 'J': 64, 'c': 22, 'S': 27, 'g': 66, ':': 9, '?': 67, 'Y': 81, '"': 31, 'N': 68, 'r': 7, '4': 69, 'v': 82, 'W': 70, 'K': 19, '1': 58, '.': 72, 'R': 44, 'z': 74, 'd': 75, 'C': 77, ' ': 78, 'x': 79, 'e': 71, "'": 80}

{0: 'X', 1: 'k', 2: '\n', 3: 'q', 4: 'p', 5: '(', 6: 'b', 7: 'r', 8: 'j', 9: ':', 10: 'U', 11: '*', 12: 'P', 13: '/', 14: 'l', 15: 'M', 16: '!', 17: 'i', 18: 'B', 19: 'K', 20: '9', 21: 'A', 22: 'c', 23: 's', 24: 'F', 25: 'n', 26: '-', 27: 'S', 28: 'f', 29: 'O'

### Create an iteger representation of 'text' (millions of chars as ints)

unichr(x) - char from unicode int
ord(x) - byte or in from chr() or unichar() respectively

In [8]:
vocab_int = np.array([ord(c) for c in vocab], dtype=np.int32)
print(vocab_int)

print(len(vocab))
print(len(vocab_int))

[ 88 107  10 113 112  40  98 114 106  58  85  42  80  47 108  77  33 105
  66  75  57  65  99 115  70 110  45  83 102  79 117  34  95  55  81  73
  44  50  71  53  36  69 121  72  82 109  41  76  56  90  96 116  51  64
  86 111  48  97  49  84  68 104  59  54  74  38 103  63  78  52  87 101
  46 119 122 100  37  67  32 120  39  89 118]
83
83


In [9]:
#chars = np.array([ord(c) for c in text], dtype=np.int32)
chars = np.array([vocab_to_int[c] for c in text], dtype=np.int32)
print(chars[:100])

[77 61 57  4 51 71  7 78 58  2  2  2 43 57  4  4 42 78 28 57 45 17 14 17 71
 23 78 57  7 71 78 57 14 14 78 57 14 17  1 71 62 78 71 82 71  7 42 78 30 25
 61 57  4  4 42 78 28 57 45 17 14 42 78 17 23 78 30 25 61 57  4  4 42 78 17
 25 78 17 51 23 78 55 73 25  2 73 57 42 72  2  2 41 82 71  7 42 51 61 17 25]


## Data split

In [10]:
def split_data(chars_vector,
               samples_per_batch,
               sample_length,
               split_frac=0.9):

    x = chars_vector[:-1]
    y = chars_vector[1:]

    sample_count = len(x)-sample_length+1
    
    x_batches = []
    y_batches = []
    
    start_range = range(0, sample_count, sample_length)

    x_samples = np.array([x[start:start+sample_length] for start in start_range])
    y_samples = np.array([y[start:start+sample_length] for start in start_range])

    if sample_count > samples_per_batch:

        batch_count = len(x_samples) // samples_per_batch
        new_length = batch_count * samples_per_batch
        end_crop_count = len(x_samples)-new_length

        if end_crop_count != 0:
            x_samples = x_samples[:-end_crop_count]
            y_samples = y_samples[:-end_crop_count]

        x_batches = np.array(np.split(x_samples, batch_count))
        y_batches = np.array(np.split(y_samples, batch_count))
        
    else:
        
        x_batches = x_samples
        y_batches = y_samples
        
    
    split_idx = int(len(x_batches)*split_frac)

    train_x, train_y = x_batches[:split_idx], y_batches[:split_idx]
    val_x, val_y = x_batches[split_idx:], y_batches[split_idx:]
    
    return train_x, train_y, val_x, val_y

In [11]:
vector = np.arange(42)
samples_in_batch = 5
sample_length = 3
split_frac = 1.

tx, ty, _, _ = split_data(vector, samples_in_batch, sample_length, split_frac)

print('\ntx:')
print(tx)
print('\nty:')
print(ty)

print(tx.shape)


tx:
[[[ 0  1  2]
  [ 3  4  5]
  [ 6  7  8]
  [ 9 10 11]
  [12 13 14]]

 [[15 16 17]
  [18 19 20]
  [21 22 23]
  [24 25 26]
  [27 28 29]]]

ty:
[[[ 1  2  3]
  [ 4  5  6]
  [ 7  8  9]
  [10 11 12]
  [13 14 15]]

 [[16 17 18]
  [19 20 21]
  [22 23 24]
  [25 26 27]
  [28 29 30]]]
(2, 5, 3)


In [12]:
def get_batch(tx, ty):
    for x, y in zip(tx, ty):
        yield x, y

#### Creating training and validation sets using function defined above

In [13]:
train_x, train_y, val_x, val_y = split_data(chars_vector=chars, 
                                            samples_per_batch=100, 
                                            sample_length=100)

In [14]:
print('train_x.shape: {}'.format(train_x.shape))
print('train_y.shape: {}'.format(train_y.shape))



train_x.shape: (178, 100, 100)
train_y.shape: (178, 100, 100)


In [15]:
train_x[0,:2,:]

array([[77, 61, 57,  4, 51, 71,  7, 78, 58,  2,  2,  2, 43, 57,  4,  4, 42,
        78, 28, 57, 45, 17, 14, 17, 71, 23, 78, 57,  7, 71, 78, 57, 14, 14,
        78, 57, 14, 17,  1, 71, 62, 78, 71, 82, 71,  7, 42, 78, 30, 25, 61,
        57,  4,  4, 42, 78, 28, 57, 45, 17, 14, 42, 78, 17, 23, 78, 30, 25,
        61, 57,  4,  4, 42, 78, 17, 25, 78, 17, 51, 23, 78, 55, 73, 25,  2,
        73, 57, 42, 72,  2,  2, 41, 82, 71,  7, 42, 51, 61, 17, 25],
       [66, 78, 73, 57, 23, 78, 17, 25, 78, 22, 55, 25, 28, 30, 23, 17, 55,
        25, 78, 17, 25, 78, 51, 61, 71, 78, 29,  6, 14, 55, 25, 23,  1, 42,
        23, 80, 78, 61, 55, 30, 23, 71, 72, 78, 59, 61, 71, 78, 73, 17, 28,
        71, 78, 61, 57, 75,  2, 75, 17, 23, 22, 55, 82, 71,  7, 71, 75, 78,
        51, 61, 57, 51, 78, 51, 61, 71, 78, 61, 30, 23,  6, 57, 25, 75, 78,
        73, 57, 23, 78, 22, 57,  7,  7, 42, 17, 25, 66, 78, 55, 25]], dtype=int32)

In [16]:
train_y[0, :2, :]

array([[61, 57,  4, 51, 71,  7, 78, 58,  2,  2,  2, 43, 57,  4,  4, 42, 78,
        28, 57, 45, 17, 14, 17, 71, 23, 78, 57,  7, 71, 78, 57, 14, 14, 78,
        57, 14, 17,  1, 71, 62, 78, 71, 82, 71,  7, 42, 78, 30, 25, 61, 57,
         4,  4, 42, 78, 28, 57, 45, 17, 14, 42, 78, 17, 23, 78, 30, 25, 61,
        57,  4,  4, 42, 78, 17, 25, 78, 17, 51, 23, 78, 55, 73, 25,  2, 73,
        57, 42, 72,  2,  2, 41, 82, 71,  7, 42, 51, 61, 17, 25, 66],
       [78, 73, 57, 23, 78, 17, 25, 78, 22, 55, 25, 28, 30, 23, 17, 55, 25,
        78, 17, 25, 78, 51, 61, 71, 78, 29,  6, 14, 55, 25, 23,  1, 42, 23,
        80, 78, 61, 55, 30, 23, 71, 72, 78, 59, 61, 71, 78, 73, 17, 28, 71,
        78, 61, 57, 75,  2, 75, 17, 23, 22, 55, 82, 71,  7, 71, 75, 78, 51,
        61, 57, 51, 78, 51, 61, 71, 78, 61, 30, 23,  6, 57, 25, 75, 78, 73,
        57, 23, 78, 22, 57,  7,  7, 42, 17, 25, 66, 78, 55, 25, 78]], dtype=int32)

### Building the model

In [17]:
def build_rnn(num_classes,
              samples_per_batch=50,
              sample_length=50,
              lstm_size=128, 
              num_layers=2,
              learning_rate=0.001, 
              grad_clip=5, 
              sampling=False):
    
    # When we're using this network for sampling later, we'll be passing in
    # one character at a time, so providing an option for that
    if sampling == True:
        samples_per_batch, sample_length = 1, 1

    tf.reset_default_graph()
    
    # Declare placeholders we'll feed into the graph
    inputs = tf.placeholder(tf.int32, [samples_per_batch, sample_length], name='inputs')
    targets = tf.placeholder(tf.int32, [samples_per_batch, sample_length], name='targets')
    
    # Keep probability placeholder for drop out layers
    keep_prob = tf.placeholder(tf.float32, name='keep_prob')
    
    # One-hot encoding the input and target characters
    x_one_hot = tf.one_hot(inputs, num_classes)
    y_one_hot = tf.one_hot(targets, num_classes)

    ### Build the RNN layers
    # Use a basic LSTM cell
    lstm = tf.contrib.rnn.BasicLSTMCell(lstm_size)
    
    # Add dropout to the cell
    drop = tf.contrib.rnn.DropoutWrapper(lstm, output_keep_prob=keep_prob)
    
    # Stack up multiple LSTM layers, for deep learning
    cell = tf.contrib.rnn.MultiRNNCell([drop] * num_layers)
    initial_state = cell.zero_state(samples_per_batch, tf.float32)

    ### Run the data through the RNN layers
    # This makes a list where each element is on step in the sequence
    rnn_inputs = [tf.squeeze(i, squeeze_dims=[1]) for i in tf.split(x_one_hot, sample_length, 1)]
    
    # Run each sequence step through the RNN and collect the outputs
    outputs, state = tf.contrib.rnn.static_rnn(cell, rnn_inputs, initial_state=initial_state)
    final_state = state
    
    # Reshape output so it's a bunch of rows, one output row for each step for each batch
    seq_output = tf.concat(outputs, axis=1)
    output = tf.reshape(seq_output, [-1, lstm_size])
    
    # Now connect the RNN outputs to a softmax layer
    with tf.variable_scope('softmax'):
        softmax_w = tf.Variable(tf.truncated_normal((lstm_size, num_classes), stddev=0.1))
        softmax_b = tf.Variable(tf.zeros(num_classes))
    
    # Since output is a bunch of rows of RNN cell outputs, logits will be a bunch
    # of rows of logit outputs, one for each step and batch
    logits = tf.matmul(output, softmax_w) + softmax_b
    
    # Use softmax to get the probabilities for predicted characters
    preds = tf.nn.softmax(logits, name='predictions')
    
    # Reshape the targets to match the logits
    y_reshaped = tf.reshape(y_one_hot, [-1, num_classes])
    loss = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y_reshaped)
    cost = tf.reduce_mean(loss)

    # Optimizer for training, using gradient clipping to control exploding gradients
    tvars = tf.trainable_variables()
    grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), grad_clip)
    train_op = tf.train.AdamOptimizer(learning_rate)
    optimizer = train_op.apply_gradients(zip(grads, tvars))
    
    # Export the nodes
    # NOTE: I'm using a namedtuple here because I think they are cool
    export_nodes = ['inputs', 'targets', 'initial_state', 'final_state',
                    'keep_prob', 'cost', 'preds', 'optimizer']
    Graph = namedtuple('Graph', export_nodes)
    local_dict = locals()
    graph = Graph(*[local_dict[each] for each in export_nodes])
    
    return graph

## Training

In [18]:
samples_per_batch = 100
sample_length = 100 
lstm_size = 512
num_layers = 2
learning_rate = 0.002
keep_prob = 0.5

In [19]:
epoch_count = 20
# Save every N iterations
save_every_n = 200

print('Generating training and validation data...')
train_x, train_y, val_x, val_y = split_data(chars, samples_per_batch, sample_length)
print('train_x len: {}'.format(len(train_x)))
print('train_y len: {}'.format(len(train_y)))
print('val_x len: {}'.format(len(val_x)))
print('val_y len: {}'.format(len(val_y)))
print('Data generation complete.')

print('Building model...')
model = build_rnn(len(vocab_int), 
                  samples_per_batch=samples_per_batch,
                  sample_length=sample_length,
                  learning_rate=learning_rate,
                  lstm_size=lstm_size,
                  num_layers=num_layers)
print('Model built')

saver = tf.train.Saver(max_to_keep=100)
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    
    n_batches = len(train_x)
    print('total batches: {}'.format(n_batches))
    
    iteration_count = n_batches * epoch_count
    print('total iteration count: {}'.format(iteration_count))
    
    for epoch in range(epoch_count):
        
        new_state = sess.run(model.initial_state)
        loss = 0
        
        for i, (x, y) in enumerate(get_batch(train_x, train_y)):
#             print(train_x[i])
#             print(x.shape)
            iteration = epoch*n_batches + i + 1
            start = time.time()
            
            feed = {model.inputs: x, 
                    model.targets: y,
                    model.keep_prob: keep_prob,
                    model.initial_state: new_state}
            
            batch_loss, new_state, _ = sess.run([model.cost, model.final_state, 
                                                 model.optimizer], 
                                                 feed_dict=feed)
            loss += batch_loss
            end = time.time()
            print('Epoch {}/{} '.format(epoch, epoch_count-1),
                  'Iteration {}/{}'.format(iteration, iteration_count),
                  'Training loss: {:.4f}'.format(loss/(i+1)),
                  '{:.4f} sec/batch'.format((end-start)))
            
            if (iteration%save_every_n == 0) or (iteration == iteration_count):
                # Check performance, notice dropout has been set to 1
                val_loss = []
                new_state = sess.run(model.initial_state)
                for x, y in get_batch(val_x, val_y):
                    feed = {model.inputs: x,
                            model.targets: y,
                            model.keep_prob: 1.,
                            model.initial_state: new_state}
                    batch_loss, new_state = sess.run([model.cost, model.final_state], feed_dict=feed)
                    val_loss.append(batch_loss)

                print('Validation loss:', np.mean(val_loss),
                      'Saving checkpoint!')
                saver.save(sess, "checkpoints/i{}_l{}_v{:.3f}.ckpt".format(iteration, lstm_size, np.mean(val_loss)))
        

Generating training and validation data...
train_x len: 178
train_y len: 178
val_x len: 20
val_y len: 20
Data generation complete.
Building model...
Model built
total batches: 178
total iteration count: 3560
Epoch 0/19  Iteration 1/3560 Training loss: 4.4158 1.7391 sec/batch
Epoch 0/19  Iteration 2/3560 Training loss: 4.3063 0.3272 sec/batch
Epoch 0/19  Iteration 3/3560 Training loss: 4.7519 0.2953 sec/batch
Epoch 0/19  Iteration 4/3560 Training loss: 4.6665 0.2873 sec/batch
Epoch 0/19  Iteration 5/3560 Training loss: 4.5267 0.2887 sec/batch
Epoch 0/19  Iteration 6/3560 Training loss: 4.3733 0.2880 sec/batch
Epoch 0/19  Iteration 7/3560 Training loss: 4.2463 0.2875 sec/batch
Epoch 0/19  Iteration 8/3560 Training loss: 4.1422 0.2877 sec/batch
Epoch 0/19  Iteration 9/3560 Training loss: 4.0653 0.2875 sec/batch
Epoch 0/19  Iteration 10/3560 Training loss: 4.0024 0.2869 sec/batch
Epoch 0/19  Iteration 11/3560 Training loss: 3.9392 0.2861 sec/batch
Epoch 0/19  Iteration 12/3560 Training los

In [21]:
tf.train.get_checkpoint_state('checkpoints')

model_checkpoint_path: "checkpoints/i3560_l512_v1.330.ckpt"
all_model_checkpoint_paths: "checkpoints/i200_l512_v2.251.ckpt"
all_model_checkpoint_paths: "checkpoints/i400_l512_v1.872.ckpt"
all_model_checkpoint_paths: "checkpoints/i600_l512_v1.697.ckpt"
all_model_checkpoint_paths: "checkpoints/i800_l512_v1.579.ckpt"
all_model_checkpoint_paths: "checkpoints/i1000_l512_v1.509.ckpt"
all_model_checkpoint_paths: "checkpoints/i1200_l512_v1.472.ckpt"
all_model_checkpoint_paths: "checkpoints/i1400_l512_v1.430.ckpt"
all_model_checkpoint_paths: "checkpoints/i1600_l512_v1.400.ckpt"
all_model_checkpoint_paths: "checkpoints/i1800_l512_v1.390.ckpt"
all_model_checkpoint_paths: "checkpoints/i2000_l512_v1.373.ckpt"
all_model_checkpoint_paths: "checkpoints/i2200_l512_v1.368.ckpt"
all_model_checkpoint_paths: "checkpoints/i2400_l512_v1.350.ckpt"
all_model_checkpoint_paths: "checkpoints/i2600_l512_v1.346.ckpt"
all_model_checkpoint_paths: "checkpoints/i2800_l512_v1.346.ckpt"
all_model_checkpoint_paths: "check

## Sampling

In [22]:
def pick_top_n(preds, vocab_size, top_n=5):
    p = np.squeeze(preds)
    p[np.argsort(p)[:-top_n]] = 0
    p = p / np.sum(p)
    c = np.random.choice(vocab_size, 1, p=p)[0]
    return c

In [23]:
def sample(checkpoint, n_samples, lstm_size, vocab_size, prime="The "):
    samples = [c for c in prime]
    model = build_rnn(vocab_size, lstm_size=lstm_size, sampling=True)
    saver = tf.train.Saver()
    with tf.Session() as sess:
        saver.restore(sess, checkpoint)
        new_state = sess.run(model.initial_state)
        for c in prime:
            x = np.zeros((1, 1))
            #x[0,0] = ord(c)# vocab_to_int[c]
            x[0,0] = vocab_to_int[c]
            feed = {model.inputs: x,
                    model.keep_prob: 1.,
                    model.initial_state: new_state}
            preds, new_state = sess.run([model.preds, model.final_state], 
                                         feed_dict=feed)

        c = pick_top_n(preds, len(vocab), 1)
        #samples.append(chr(c))#(int_to_vocab[c])
        samples.append(int_to_vocab[c])

        for i in range(n_samples):
            x[0,0] = c
            feed = {model.inputs: x,
                    model.keep_prob: 1.,
                    model.initial_state: new_state}
            preds, new_state = sess.run([model.preds, model.final_state], 
                                         feed_dict=feed)

            c = pick_top_n(preds, len(vocab))
            #samples.append(chr(c))# (int_to_vocab[c])
            samples.append(int_to_vocab[c])
        
    return ''.join(samples)

In [25]:
checkpoint = "checkpoints/i3560_l512_v1.330.ckpt"
samp = sample(checkpoint, 2000, lstm_size, len(vocab), prime="Fra")
print(samp)

Frank had so that she had been talking of him. He had to spok
and the sound of his word.

She had not been so much as always in a life of him at the sight to be sometimes to
be all the sides of the colonel of Levin's sincere, who came into the country at the
same time that which had not the sound of her for a support of the position, he was talking at
the conversation of to the cross to the same.

"I wanted," said the little smallen, wanted an account."

"I don't look for her to
see him. This is a little son, and he's nothing, but the subject of to her head;. He was not
all the more still more from that she had never seen her
and her mustaches. "What do you say? Happen with him?" she said, taking his bad working
into the door of the disagreeable feeling of herself, to to
be some town in the princess, had breaked off at the moment as seeking, as he felt
that he had not seen him as he had been being being away from the service.

"And with him to speak of them. He has not
he dressing at t