## Text-gen RNN

### Import relevant libraries

In [1]:
import time
from collections import namedtuple

import numpy as np
import tensorflow as tf

### Load text into a one huge string (millions of chars)

In [2]:
with open('anna.txt', 'r') as f:
    text=f.read()

In [3]:
print(text[:100])
print(len(text))

Chapter 1


Happy families are all alike; every unhappy family is unhappy in its own
way.

Everythin
1985223


### Create vocabulary : a set of all chars of which 'text' consists

In [4]:
vocab = set(text)

In [5]:
print(vocab)


{'F', '9', 'S', '5', "'", ' ', '&', 'A', 'm', 'i', 'a', 'r', '@', 'o', 'K', 'Z', ')', '*', 'b', 'I', ';', '3', '`', ',', 'R', 'z', 'W', 'c', '!', '7', 'X', 'u', 'd', 'Y', 'e', 'L', 'n', '8', '-', '?', '/', '1', 'q', '$', 'v', '4', '.', 't', 'g', 'N', 'P', 'j', '"', 'B', 'G', 'J', 'Q', 'k', 'p', 'f', 'O', 'U', '(', 'H', '\n', 'T', 'C', 's', ':', 'E', 'D', 'w', '0', 'y', 'l', 'x', '2', '%', '_', 'V', 'M', 'h', '6'}


### Create vocab_to_int and int_to_vocab. These are dictionaries. You won't need it. Hopefully

In [6]:
vocab_to_int = {c: i for i, c in enumerate(vocab)}
int_to_vocab = dict(enumerate(vocab))

In [7]:
print(vocab_to_int)
print()
print(int_to_vocab)

{'F': 0, '9': 1, '5': 3, 'l': 74, 'J': 55, "'": 4, 'x': 75, ' ': 5, '&': 6, 'A': 7, 'm': 8, 'i': 9, 'a': 10, 'r': 11, 'o': 13, 'K': 14, ')': 16, 'Z': 15, '*': 17, 'S': 2, 'I': 19, '3': 21, ',': 23, 'C': 66, 'R': 24, 'z': 25, 'G': 54, 'g': 48, 'W': 26, 'c': 27, 'X': 30, '7': 29, 'u': 31, 'd': 32, 'Y': 33, 'e': 34, '8': 37, 'n': 36, 'b': 18, '\n': 64, 'Q': 56, '1': 41, 'q': 42, '$': 43, 'v': 44, '`': 22, '.': 46, 't': 47, ';': 20, 'N': 49, 'P': 50, 'j': 51, '"': 52, 'B': 53, 'O': 60, '@': 12, 's': 67, 'p': 58, '!': 28, '4': 45, 'U': 61, '(': 62, 'H': 63, 'T': 65, 'E': 69, '/': 40, '-': 38, ':': 68, '?': 39, 'w': 71, '0': 72, 'D': 70, 'L': 35, 'k': 57, '_': 78, 'V': 79, '2': 76, 'M': 80, '%': 77, 'f': 59, 'h': 81, 'y': 73, '6': 82}

{0: 'F', 1: '9', 2: 'S', 3: '5', 4: "'", 5: ' ', 6: '&', 7: 'A', 8: 'm', 9: 'i', 10: 'a', 11: 'r', 12: '@', 13: 'o', 14: 'K', 15: 'Z', 16: ')', 17: '*', 18: 'b', 19: 'I', 20: ';', 21: '3', 22: '`', 23: ',', 24: 'R', 25: 'z', 26: 'W', 27: 'c', 28: '!', 29: '7',

### Create an iteger representation of 'text' (millions of chars as ints)

unichr(x) - char from unicode int
ord(x) - byte or in from chr() or unichar() respectively

In [8]:
vocab_int = np.array([ord(c) for c in vocab], dtype=np.int32)
print(vocab_int)

print(len(vocab))
print(len(vocab_int))

[ 70  57  83  53  39  32  38  65 109 105  97 114  64 111  75  90  41  42
  98  73  59  51  96  44  82 122  87  99  33  55  88 117 100  89 101  76
 110  56  45  63  47  49 113  36 118  52  46 116 103  78  80 106  34  66
  71  74  81 107 112 102  79  85  40  72  10  84  67 115  58  69  68 119
  48 121 108 120  50  37  95  86  77 104  54]
83
83


In [9]:
#chars = np.array([ord(c) for c in text], dtype=np.int32)
chars = np.array([vocab_to_int[c] for c in text], dtype=np.int32)
print(chars[:100])

[66 81 10 58 47 34 11  5 41 64 64 64 63 10 58 58 73  5 59 10  8  9 74  9 34
 67  5 10 11 34  5 10 74 74  5 10 74  9 57 34 20  5 34 44 34 11 73  5 31 36
 81 10 58 58 73  5 59 10  8  9 74 73  5  9 67  5 31 36 81 10 58 58 73  5  9
 36  5  9 47 67  5 13 71 36 64 71 10 73 46 64 64 69 44 34 11 73 47 81  9 36]


## Data split

In [12]:
def split_data(chars_vector,
               samples_per_batch,
               sample_length,
               split_frac=0.9):

    x = chars_vector[:-1]
    y = chars_vector[1:]

    sample_count = len(x)-sample_length+1
    
    x_batches = []
    y_batches = []
    
    start_range = range(0, sample_count, sample_length)

    x_samples = np.array([x[start:start+sample_length] for start in start_range])
    y_samples = np.array([y[start:start+sample_length] for start in start_range])

    if sample_count > samples_per_batch:

        batch_count = len(x_samples) // samples_per_batch
        new_length = batch_count * samples_per_batch
        end_crop_count = len(x_samples)-new_length

        if end_crop_count != 0:
            x_samples = x_samples[:-end_crop_count]
            y_samples = y_samples[:-end_crop_count]

        x_batches = np.array(np.split(x_samples, batch_count))
        y_batches = np.array(np.split(y_samples, batch_count))
        
    else:
        
        x_batches = x_samples
        y_batches = y_samples
        
    
    split_idx = int(len(x_batches)*split_frac)

    train_x, train_y = x_batches[:split_idx], y_batches[:split_idx]
    val_x, val_y = x_batches[split_idx:], y_batches[split_idx:]
    
    return train_x, train_y, val_x, val_y

In [13]:
vector = np.arange(42)
samples_in_batch = 5
sample_length = 3
split_frac = 1.

tx, ty, _, _ = split_data(vector, samples_in_batch, sample_length, split_frac)

print('\ntx:')
print(tx)
print('\nty:')
print(ty)

print(tx.shape)


tx:
[[[ 0  1  2]
  [ 3  4  5]
  [ 6  7  8]
  [ 9 10 11]
  [12 13 14]]

 [[15 16 17]
  [18 19 20]
  [21 22 23]
  [24 25 26]
  [27 28 29]]]

ty:
[[[ 1  2  3]
  [ 4  5  6]
  [ 7  8  9]
  [10 11 12]
  [13 14 15]]

 [[16 17 18]
  [19 20 21]
  [22 23 24]
  [25 26 27]
  [28 29 30]]]
(2, 5, 3)


In [14]:
def get_batch(tx, ty):
    for x, y in zip(tx, ty):
        yield x, y

#### Creating training and validation sets using function defined above

In [15]:
train_x, train_y, val_x, val_y = split_data(chars_vector=chars, 
                                            samples_per_batch=100, 
                                            sample_length=100)

In [16]:
print('train_x.shape: {}'.format(train_x.shape))
print('train_y.shape: {}'.format(train_y.shape))



train_x.shape: (178, 100, 100)
train_y.shape: (178, 100, 100)


In [17]:
train_x[0,:2,:]

array([[66, 81, 10, 58, 47, 34, 11,  5, 41, 64, 64, 64, 63, 10, 58, 58, 73,
         5, 59, 10,  8,  9, 74,  9, 34, 67,  5, 10, 11, 34,  5, 10, 74, 74,
         5, 10, 74,  9, 57, 34, 20,  5, 34, 44, 34, 11, 73,  5, 31, 36, 81,
        10, 58, 58, 73,  5, 59, 10,  8,  9, 74, 73,  5,  9, 67,  5, 31, 36,
        81, 10, 58, 58, 73,  5,  9, 36,  5,  9, 47, 67,  5, 13, 71, 36, 64,
        71, 10, 73, 46, 64, 64, 69, 44, 34, 11, 73, 47, 81,  9, 36],
       [48,  5, 71, 10, 67,  5,  9, 36,  5, 27, 13, 36, 59, 31, 67,  9, 13,
        36,  5,  9, 36,  5, 47, 81, 34,  5, 60, 18, 74, 13, 36, 67, 57, 73,
        67,  4,  5, 81, 13, 31, 67, 34, 46,  5, 65, 81, 34,  5, 71,  9, 59,
        34,  5, 81, 10, 32, 64, 32,  9, 67, 27, 13, 44, 34, 11, 34, 32,  5,
        47, 81, 10, 47,  5, 47, 81, 34,  5, 81, 31, 67, 18, 10, 36, 32,  5,
        71, 10, 67,  5, 27, 10, 11, 11, 73,  9, 36, 48,  5, 13, 36]], dtype=int32)

In [18]:
train_y[0, :2, :]

array([[81, 10, 58, 47, 34, 11,  5, 41, 64, 64, 64, 63, 10, 58, 58, 73,  5,
        59, 10,  8,  9, 74,  9, 34, 67,  5, 10, 11, 34,  5, 10, 74, 74,  5,
        10, 74,  9, 57, 34, 20,  5, 34, 44, 34, 11, 73,  5, 31, 36, 81, 10,
        58, 58, 73,  5, 59, 10,  8,  9, 74, 73,  5,  9, 67,  5, 31, 36, 81,
        10, 58, 58, 73,  5,  9, 36,  5,  9, 47, 67,  5, 13, 71, 36, 64, 71,
        10, 73, 46, 64, 64, 69, 44, 34, 11, 73, 47, 81,  9, 36, 48],
       [ 5, 71, 10, 67,  5,  9, 36,  5, 27, 13, 36, 59, 31, 67,  9, 13, 36,
         5,  9, 36,  5, 47, 81, 34,  5, 60, 18, 74, 13, 36, 67, 57, 73, 67,
         4,  5, 81, 13, 31, 67, 34, 46,  5, 65, 81, 34,  5, 71,  9, 59, 34,
         5, 81, 10, 32, 64, 32,  9, 67, 27, 13, 44, 34, 11, 34, 32,  5, 47,
        81, 10, 47,  5, 47, 81, 34,  5, 81, 31, 67, 18, 10, 36, 32,  5, 71,
        10, 67,  5, 27, 10, 11, 11, 73,  9, 36, 48,  5, 13, 36,  5]], dtype=int32)

### Building the model

In [19]:
def build_rnn(num_classes,
              samples_per_batch=50,
              sample_length=50,
              lstm_size=128, 
              num_layers=2,
              learning_rate=0.001, 
              grad_clip=5, 
              sampling=False):
    
    # When we're using this network for sampling later, we'll be passing in
    # one character at a time, so providing an option for that
    if sampling == True:
        samples_per_batch, sample_length = 1, 1

    tf.reset_default_graph()
    
    # Declare placeholders we'll feed into the graph
    inputs = tf.placeholder(tf.int32, [samples_per_batch, sample_length], name='inputs')
    targets = tf.placeholder(tf.int32, [samples_per_batch, sample_length], name='targets')
    
    # Keep probability placeholder for drop out layers
    keep_prob = tf.placeholder(tf.float32, name='keep_prob')
    
    # One-hot encoding the input and target characters
    x_one_hot = tf.one_hot(inputs, num_classes)
    y_one_hot = tf.one_hot(targets, num_classes)

    ### Build the RNN layers
    # Use a basic LSTM cell
    lstm = tf.contrib.rnn.BasicLSTMCell(lstm_size)
    
    # Add dropout to the cell
    drop = tf.contrib.rnn.DropoutWrapper(lstm, output_keep_prob=keep_prob)
    
    # Stack up multiple LSTM layers, for deep learning
    cell = tf.contrib.rnn.MultiRNNCell([drop] * num_layers)
    initial_state = cell.zero_state(samples_per_batch, tf.float32)

    ### Run the data through the RNN layers
    # This makes a list where each element is on step in the sequence
    rnn_inputs = [tf.squeeze(i, squeeze_dims=[1]) for i in tf.split(x_one_hot, sample_length, 1)]
    
    # Run each sequence step through the RNN and collect the outputs
    outputs, state = tf.contrib.rnn.static_rnn(cell, rnn_inputs, initial_state=initial_state)
    final_state = state
    
    # Reshape output so it's a bunch of rows, one output row for each step for each batch
    seq_output = tf.concat(outputs, axis=1)
    output = tf.reshape(seq_output, [-1, lstm_size])
    
    # Now connect the RNN outputs to a softmax layer
    with tf.variable_scope('softmax'):
        softmax_w = tf.Variable(tf.truncated_normal((lstm_size, num_classes), stddev=0.1))
        softmax_b = tf.Variable(tf.zeros(num_classes))
    
    # Since output is a bunch of rows of RNN cell outputs, logits will be a bunch
    # of rows of logit outputs, one for each step and batch
    logits = tf.matmul(output, softmax_w) + softmax_b
    
    # Use softmax to get the probabilities for predicted characters
    preds = tf.nn.softmax(logits, name='predictions')
    
    # Reshape the targets to match the logits
    y_reshaped = tf.reshape(y_one_hot, [-1, num_classes])
    loss = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y_reshaped)
    cost = tf.reduce_mean(loss)

    # Optimizer for training, using gradient clipping to control exploding gradients
    tvars = tf.trainable_variables()
    grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), grad_clip)
    train_op = tf.train.AdamOptimizer(learning_rate)
    optimizer = train_op.apply_gradients(zip(grads, tvars))
    
    # Export the nodes
    # NOTE: I'm using a namedtuple here because I think they are cool
    export_nodes = ['inputs', 'targets', 'initial_state', 'final_state',
                    'keep_prob', 'cost', 'preds', 'optimizer']
    Graph = namedtuple('Graph', export_nodes)
    local_dict = locals()
    graph = Graph(*[local_dict[each] for each in export_nodes])
    
    return graph

## Training

In [20]:
samples_per_batch = 100
sample_length = 100 
lstm_size = 512
num_layers = 2
learning_rate = 0.002
keep_prob = 0.5

In [21]:
epoch_count = 20
# Save every N iterations
save_every_n = 200

print('Generating training and validation data...')
train_x, train_y, val_x, val_y = split_data(chars, samples_per_batch, sample_length)
print('train_x len: {}'.format(len(train_x)))
print('train_y len: {}'.format(len(train_y)))
print('val_x len: {}'.format(len(val_x)))
print('val_y len: {}'.format(len(val_y)))
print('Data generation complete.')

print('Building model...')
model = build_rnn(len(vocab_int), 
                  samples_per_batch=samples_per_batch,
                  sample_length=sample_length,
                  learning_rate=learning_rate,
                  lstm_size=lstm_size,
                  num_layers=num_layers)
print('Model built')

saver = tf.train.Saver(max_to_keep=100)
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    
    n_batches = len(train_x)
    print('total batches: {}'.format(n_batches))
    
    iteration_count = n_batches * epoch_count
    print('total iteration count: {}'.format(iteration_count))
    
    for epoch in range(epoch_count):
        
        new_state = sess.run(model.initial_state)
        loss = 0
        
        for i, (x, y) in enumerate(get_batch(train_x, train_y)):
#             print(train_x[i])
#             print(x.shape)
            iteration = epoch*n_batches + i + 1
            start = time.time()
            
            feed = {model.inputs: x, 
                    model.targets: y,
                    model.keep_prob: keep_prob,
                    model.initial_state: new_state}
            
            batch_loss, new_state, _ = sess.run([model.cost, model.final_state, 
                                                 model.optimizer], 
                                                 feed_dict=feed)
            loss += batch_loss
            end = time.time()
            print('Epoch {}/{} '.format(epoch, epoch_count-1),
                  'Iteration {}/{}'.format(iteration, iteration_count),
                  'Training loss: {:.4f}'.format(loss/(i+1)),
                  '{:.4f} sec/batch'.format((end-start)))
            
            if (iteration%save_every_n == 0) or (iteration == iteration_count):
                # Check performance, notice dropout has been set to 1
                val_loss = []
                new_state = sess.run(model.initial_state)
                for x, y in get_batch(val_x, val_y):
                    feed = {model.inputs: x,
                            model.targets: y,
                            model.keep_prob: 1.,
                            model.initial_state: new_state}
                    batch_loss, new_state = sess.run([model.cost, model.final_state], feed_dict=feed)
                    val_loss.append(batch_loss)

                print('Validation loss:', np.mean(val_loss),
                      'Saving checkpoint!')
                saver.save(sess, "checkpoints/i{}_l{}_v{:.3f}.ckpt".format(iteration, lstm_size, np.mean(val_loss)))
        

Generating training and validation data...
train_x len: 178
train_y len: 178
val_x len: 20
val_y len: 20
Data generation complete.
Building model...
Model built
total batches: 178
total iteration count: 3560
Epoch 0/19  Iteration 1/3560 Training loss: 4.4193 1.7882 sec/batch
Epoch 0/19  Iteration 2/3560 Training loss: 4.3430 0.3352 sec/batch
Epoch 0/19  Iteration 3/3560 Training loss: 4.8349 0.2974 sec/batch
Epoch 0/19  Iteration 4/3560 Training loss: 4.6370 0.2898 sec/batch
Epoch 0/19  Iteration 5/3560 Training loss: 4.4517 0.2921 sec/batch
Epoch 0/19  Iteration 6/3560 Training loss: 4.2918 0.2916 sec/batch
Epoch 0/19  Iteration 7/3560 Training loss: 4.1685 0.2904 sec/batch
Epoch 0/19  Iteration 8/3560 Training loss: 4.0756 0.2885 sec/batch
Epoch 0/19  Iteration 9/3560 Training loss: 4.0052 0.2865 sec/batch
Epoch 0/19  Iteration 10/3560 Training loss: 3.9409 0.2857 sec/batch
Epoch 0/19  Iteration 11/3560 Training loss: 3.8773 0.2865 sec/batch
Epoch 0/19  Iteration 12/3560 Training los

In [23]:
tf.train.get_checkpoint_state('checkpoints')

model_checkpoint_path: "checkpoints/i3560_l512_v1.319.ckpt"
all_model_checkpoint_paths: "checkpoints/i200_l512_v2.253.ckpt"
all_model_checkpoint_paths: "checkpoints/i400_l512_v1.873.ckpt"
all_model_checkpoint_paths: "checkpoints/i600_l512_v1.692.ckpt"
all_model_checkpoint_paths: "checkpoints/i800_l512_v1.577.ckpt"
all_model_checkpoint_paths: "checkpoints/i1000_l512_v1.504.ckpt"
all_model_checkpoint_paths: "checkpoints/i1200_l512_v1.469.ckpt"
all_model_checkpoint_paths: "checkpoints/i1400_l512_v1.428.ckpt"
all_model_checkpoint_paths: "checkpoints/i1600_l512_v1.401.ckpt"
all_model_checkpoint_paths: "checkpoints/i1800_l512_v1.387.ckpt"
all_model_checkpoint_paths: "checkpoints/i2000_l512_v1.370.ckpt"
all_model_checkpoint_paths: "checkpoints/i2200_l512_v1.369.ckpt"
all_model_checkpoint_paths: "checkpoints/i2400_l512_v1.349.ckpt"
all_model_checkpoint_paths: "checkpoints/i2600_l512_v1.345.ckpt"
all_model_checkpoint_paths: "checkpoints/i2800_l512_v1.342.ckpt"
all_model_checkpoint_paths: "check

## Sampling

In [24]:
def pick_top_n(preds, vocab_size, top_n=5):
    p = np.squeeze(preds)
    p[np.argsort(p)[:-top_n]] = 0
    p = p / np.sum(p)
    c = np.random.choice(vocab_size, 1, p=p)[0]
    return c

In [25]:
def sample(checkpoint, n_samples, lstm_size, vocab_size, prime="The "):
    samples = [c for c in prime]
    model = build_rnn(vocab_size, lstm_size=lstm_size, sampling=True)
    saver = tf.train.Saver()
    with tf.Session() as sess:
        saver.restore(sess, checkpoint)
        new_state = sess.run(model.initial_state)
        for c in prime:
            x = np.zeros((1, 1))
            #x[0,0] = ord(c)# vocab_to_int[c]
            x[0,0] = vocab_to_int[c]
            feed = {model.inputs: x,
                    model.keep_prob: 1.,
                    model.initial_state: new_state}
            preds, new_state = sess.run([model.preds, model.final_state], 
                                         feed_dict=feed)

        c = pick_top_n(preds, len(vocab), 1)
        #samples.append(chr(c))#(int_to_vocab[c])
        samples.append(int_to_vocab[c])

        for i in range(n_samples):
            x[0,0] = c
            feed = {model.inputs: x,
                    model.keep_prob: 1.,
                    model.initial_state: new_state}
            preds, new_state = sess.run([model.preds, model.final_state], 
                                         feed_dict=feed)

            c = pick_top_n(preds, len(vocab))
            #samples.append(chr(c))# (int_to_vocab[c])
            samples.append(int_to_vocab[c])
        
    return ''.join(samples)

In [27]:
checkpoint = "checkpoints/i3560_l512_v1.319.ckpt"
samp = sample(checkpoint, 2000, lstm_size, len(vocab), prime="Obl")
print(samp)

Oblonsky was. Anna had not seen him as though she were
all stepping in the sick man with hurriedly with the corridor; he saw
this soft into the carriage that at the children was not as he had taken her, and asked
all his wife to speak about a chelp, and he heard her head and heard his whole personal portrait, but
still the church of success. And how was that, if he had been stopping at her suddenly out of time, and the
most pleasure to be completely feeling of a prince, and ashamed about the same for more
answer to himself a chair, a calmer talking to the
decision of his starm. The marshal of some waiter or had talked about a clutch a doctor, as though to see her at
home and to give a letting
the person he had not seen all to a corness,
turning to the same arm and said the same soft fellow, and
she saw the simple of the same tried with her and her face with a
fear of her, and he smiled so much that in a feeling of thoughts, too, as he was so much and at a
summer and missed in the same 