# RNN

### Character based recurrent neural network

In [225]:
import tensorflow as tf
import numpy as np

import os

In [4]:
file_url = 'https://www.gutenberg.org/files/1400/1400-0.txt'
file_path = tf.keras.utils.get_file('1400-0.txt', file_url)

Downloading data from https://www.gutenberg.org/files/1400/1400-0.txt


In [324]:
text = open(file_path).read()
# Strip off instruction text
text = text[824:18781]

In [325]:
# get unique characters
vocab = sorted(set(text))
print(f'Unique characters: {len(vocab)}\n {vocab}')

Unique characters: 61
 ['\n', ' ', '!', "'", '(', ')', ',', '-', '.', ':', ';', '?', 'A', 'B', 'C', 'D', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'W', 'Y', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '“', '”']


In [326]:
# Assign index values to each character
char_to_index = {char: idx for idx, char in enumerate(vocab)}
idx_to_char = np.array(vocab)

In [327]:
# translate the text to array of integers

text_as_int = np.asarray([char_to_index[char] for char in text])

In [328]:
text_as_int.shape

(17957,)

In [329]:
# show character mapping

start, stop = 30, 50

print(text[start:stop])
for i in range(start, stop):
    print(f'{text[i]}:{text_as_int[i]}', end=' ')

name being Pirrip, a
n:46 a:33 m:45 e:37  :1 b:34 e:37 i:41 n:46 g:39  :1 P:26 i:41 r:50 r:50 i:41 p:48 ,:6  :1 a:33 

#### Prepare an input dataset from the text

In [330]:
# Create input Dataset

char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)

for idx in char_dataset.take(70):
    print(idx_to_char[idx], end= '')

Chapter I

My father's family name being Pirrip, and my Christian name

In [331]:
# Batch data for training

sequence_len = 80
examples_per_epoch = len(text) / sequence_len

sequences = char_dataset.batch(batch_size=sequence_len + 1, drop_remainder=True)

In [332]:
def split_input_target(text_chunk):
    """
        Creates the input and target data by shifting one
        character to the right
        
        Example: Outside -> utsider
    """
    input_txt = text_chunk[:-1]
    target_txt = text_chunk[1:]
    
    return input_txt, target_txt

In [333]:
dataset = sequences.map(split_input_target)

### Visualization of the data

###### Display the input and target data

- `dataset.take(n)`returns `n` batches.
- The batch size = `sequence_len` characters

In [334]:
# display for 2 batches
input_example, target_example = [], []

for input, target in dataset.take(1):
    print(r'Input: ', ''.join(idx_to_char[input.numpy()]))
    print(r'Target: ', ''.join(idx_to_char[target.numpy()]))
    print('----------------')
    
    input_example.append(input.numpy())
    target_example.append(target.numpy())

Input:  Chapter I

My father's family name being Pirrip, and my Christian name Philip, m
Target:  hapter I

My father's family name being Pirrip, and my Christian name Philip, my
----------------


##### Display input and the expected output

In [335]:

for i, (input_idx, target_idx) in enumerate(zip(input_example[0][:5], target_example[0][:5])):
    print(f'step: {i}\n')
    print(f'input: {idx_to_char[input_idx]} ({input_idx})')
    print(f'target:  {idx_to_char[target_idx]} ({target_idx})')
    

step: 0

input: C (14)
target:  h (40)
step: 1

input: h (40)
target:  a (33)
step: 2

input: a (33)
target:  p (48)
step: 3

input: p (48)
target:  t (52)
step: 4

input: t (52)
target:  e (37)


#### Preprocess the data for training

In [336]:
batch_size = 64 # chars in batch
steps_per_epoch = examples_per_epoch // batch_size
buffer_size = text_as_int.size

dataset = dataset.shuffle(buffer_size).batch(batch_size, drop_remainder=True)

# re-feed data to the model from the beginning
dataset = dataset.repeat()

### Build model

- The model has the layers:

    a) Embedding layers - Lookup table of vectors
    
    b) Gate Recurrent Unit
    
    c) Dense Layer

In [337]:
vocab_len = len(vocab)
embedding_dimension = 256
recurrent_nn_units = 1024

In [338]:
if tf.test.is_gpu_available():
    recurrent_nn = tf.keras.layers.CuDNNGRU
    print('Using GPU')
else:
    from functools import partial
    recurrent_nn = partial(tf.keras.layers.GRU, recurrent_activation='sigmoid')
    print('Using CPU')

Using CPU


In [339]:
def build_model(batch_size=64):
    layers = [tf.keras.layers.Embedding(input_dim=vocab_len,
                                        output_dim=embedding_dimension,
                                        batch_input_shape=[batch_size, None]
                                       ),
              recurrent_nn(units=recurrent_nn_units,
                          return_sequences=True,
                          stateful=True),
              tf.keras.layers.Dense(vocab_len)
             ]
    model = tf.keras.Sequential(layers)
    
    return model

##### instantiate model

In [340]:
model = build_model()

In [341]:
model

<tensorflow.python.keras.engine.sequential.Sequential at 0x7efe0032b6d0>

In [342]:
# Check output shape of model
batch_input_ex, batch_target_ex = [], []

for input_batch, target_batch in dataset.take(1):
    batch_pred = model(input_batch)
    batch_input_ex.append(input_batch)
    batch_target_ex.append(target_batch)
    
    print(f'Output shape: {batch_pred.shape}   # [Batch, sequence_len, vocab_len]')

Output shape: (64, 80, 61)   # [Batch, sequence_len, vocab_len]


In [343]:
model.summary()

Model: "sequential_8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_9 (Embedding)      (64, None, 256)           15616     
_________________________________________________________________
gru_8 (GRU)                  (64, None, 1024)          3938304   
_________________________________________________________________
dense_8 (Dense)              (64, None, 61)            62525     
Total params: 4,016,445
Trainable params: 4,016,445
Non-trainable params: 0
_________________________________________________________________


#### Get model predictions
- This is achieved by **sampling** the output distributions

In [344]:
sampled_indices = tf.random.categorical(logits=batch_pred[0], num_samples=1)

In [345]:
sampled_indices.shape

TensorShape([80, 1])

In [346]:
sampled_indices = tf.squeeze(sampled_indices, axis=-1).numpy()
sampled_indices.shape

(80,)

In [347]:
# input and output before training

input_ = ''.join(idx_to_char[batch_input_ex[0][0].numpy()])
output_ = ''.join(idx_to_char[sampled_indices])

print('Input:  ', input_)
print()
print('Output: ', output_)

Input:   ay of his fast
diminishing slice, to enter upon our usual friendly competition; 

Output:  Dnde“dTacOgWMo
ikKxP“
jYMjOgT.yei;QpobF
MnqKzKhxO”wNJDx!cIe-k(H)Yx;uQG?.;vr“C BT


#### Loss

In [348]:
def loss_f(labels, logits):
    return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)

In [349]:
# Loss before training

batch_loss = tf.compat.v1.losses.sparse_softmax_cross_entropy(batch_target_ex[0].numpy(), batch_pred)
print(f'Scalar loss: {batch_loss.numpy()}')

Scalar loss: 4.109776496887207


##### Compile and train the model

In [365]:
# ** Train for longer epochs

In [352]:
model_path = '.model.rnn'
epochs = 100

model.compile(optimizer=tf.keras.optimizers.Adam(), loss=loss_f)

# save checkpoints
file_prefix = os.path.join(model_path, 'ckpt_{epoch}')
callback = tf.keras.callbacks.ModelCheckpoint(filepath=file_prefix, save_weights_only=True)

# Train
model.fit(dataset, epochs=epochs, steps_per_epoch=steps_per_epoch, callbacks=[callback])


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7efdf1ca9550>

In [353]:
# Resolve checkpoint for re-training

ckpt = tf.train.latest_checkpoint(model_path)
print('latest checkpoint: ', ckpt)

model = build_model(batch_size=1)
model.load_weights(ckpt)
model.build(tf.TensorShape([1, None]))
model.summary()

latest checkpoint:  .model.rnn/ckpt_5


W0901 23:21:44.214857 139632326494016 util.py:244] Unresolved object in checkpoint: (root).optimizer
W0901 23:21:44.215723 139632326494016 util.py:244] Unresolved object in checkpoint: (root).optimizer.iter
W0901 23:21:44.216896 139632326494016 util.py:244] Unresolved object in checkpoint: (root).optimizer.beta_1
W0901 23:21:44.217730 139632326494016 util.py:244] Unresolved object in checkpoint: (root).optimizer.beta_2
W0901 23:21:44.219081 139632326494016 util.py:244] Unresolved object in checkpoint: (root).optimizer.decay
W0901 23:21:44.221703 139632326494016 util.py:244] Unresolved object in checkpoint: (root).optimizer.learning_rate
W0901 23:21:44.223446 139632326494016 util.py:244] Unresolved object in checkpoint: (root).optimizer's state 'm' for (root).layer_with_weights-0.embeddings
W0901 23:21:44.224956 139632326494016 util.py:244] Unresolved object in checkpoint: (root).optimizer's state 'm' for (root).layer_with_weights-2.kernel
W0901 23:21:44.226416 139632326494016 util.py:2

Model: "sequential_9"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_10 (Embedding)     (1, None, 256)            15616     
_________________________________________________________________
gru_9 (GRU)                  (1, None, 1024)           3938304   
_________________________________________________________________
dense_9 (Dense)              (1, None, 61)             62525     
Total params: 4,016,445
Trainable params: 4,016,445
Non-trainable params: 0
_________________________________________________________________


##### Generate Text

In [354]:
def generate_text(model, start_string, temperature, n_chars=2000):
    # Convert start string to integer vector
    input_string = [char_to_index[char] for char in start_string]
    input_string = tf.expand_dims(input_string, axis=0)
    
    generated_txt = []
    model.reset_states()
    
    for i in range(n_chars):
        predictions = model(input_string)
        
        # Remove the batch dimension
        predictions = tf.squeeze(predictions, axis=0)
        
        # Categorical distribution to predict the word returned by the model
        predictions = predictions / temperature
        pred_id = tf.random.categorical(logits=predictions, num_samples=1)[-1, 0].numpy()
        
        # Pass predicted word as input to the model + hidden state
        input_str = tf.expand_dims([pred_id], 0)
        
        generated_txt.append(idx_to_char[pred_id])
        
    return start_string + ''.join(generated_txt)

In [361]:
generated_txt = generate_text(model, start_string='pop', temperature=.1, n_chars=300)

In [363]:
# Output at loss: 2.6327

generated_txt

'pop                   e     e e     e  e      e        e  e e  e                       e e  e       e     e    e             ee      e   e         ee    e    e                                    e           e  e e    e                    e e          ee                             e     e            e '