In [1]:
import codecs
import numpy as np
import os
import time

import tensorflow as tf
tf.enable_eager_execution()

## Get the Shakespeare dataset

In [2]:
data_fpath = tf.keras.utils.get_file(
    'shakespeare.txt', 
    'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')

text = codecs.open(data_fpath, 'r', encoding='utf8').read()

print('Length of the text: {} characters'.format(len(text)))
print('=====================================')
print(text[:250])

Length of the text: 1115394 characters
First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you know Caius Marcius is chief enemy to the people.



## Extract the vocabulary

In [3]:
vocab = sorted(set(text))
VOCAB_SIZE = len(vocab)

print ('{} unique characters'.format(VOCAB_SIZE))

65 unique characters


## Create char2idx / idx2char dictionaries and convert the text

In [4]:
char2idx = {u:i for i, u in enumerate(vocab)}
idx2char = np.array(vocab)

text_as_int = np.array([char2idx[c] for c in text])

for char, _ in zip(char2idx, range(20)):
    print('{:4s}: {:3d},'.format(repr(char), char2idx[char]))
print('...')
print('=====================================')
print('Example of the encoded text: {}'.format(text_as_int[:13]))

'\n':   0,
' ' :   1,
'!' :   2,
'$' :   3,
'&' :   4,
"'" :   5,
',' :   6,
'-' :   7,
'.' :   8,
'3' :   9,
':' :  10,
';' :  11,
'?' :  12,
'A' :  13,
'B' :  14,
'C' :  15,
'D' :  16,
'E' :  17,
'F' :  18,
'G' :  19,
...
Example of the encoded text: [18 47 56 57 58  1 15 47 58 47 64 43 52]


## Prepare TF data pipeline

In [5]:
SEQ_LEN = 100
BATCH_SIZE = 64

examples_per_epoch = len(text) // SEQ_LEN
steps_per_epoch = examples_per_epoch // BATCH_SIZE

def split_input_target(chunk):
    input_text = chunk[:-1]
    target_text = chunk[1:]
    return input_text, target_text

dataset = tf.data.Dataset.from_tensor_slices(text_as_int)

dataset = dataset.batch(SEQ_LEN+1, drop_remainder=True)
dataset = dataset.map(split_input_target)

dataset = dataset.shuffle(buffer_size=10000)
dataset = dataset.batch(BATCH_SIZE, drop_remainder=True)

## Take an example from the dataset

In [6]:
for input_example, target_example in dataset.take(1):
    print('=====================================')
    print ('Input data: ', repr(''.join(idx2char[input_example.numpy()[0]])))
    print ('Target data:', repr(''.join(idx2char[target_example.numpy()[0]])))

Instructions for updating:
Colocations handled automatically by placer.
Input data:  "f of France, but worse than wolves of France,\nWhose tongue more poisons than the adder's tooth!\nHow "
Target data: " of France, but worse than wolves of France,\nWhose tongue more poisons than the adder's tooth!\nHow i"


## Build the model

In [7]:
def build_model(batch_size):
    return tf.keras.Sequential([
        tf.keras.layers.Embedding(VOCAB_SIZE, 256, batch_input_shape=[batch_size, None]),
        tf.keras.layers.CuDNNGRU(1024,
                                 return_sequences=True,
                                 recurrent_initializer='glorot_uniform',
                                 stateful=True),
        tf.keras.layers.Dense(VOCAB_SIZE),
    ])

model = build_model(batch_size=BATCH_SIZE)

model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (64, None, 256)           16640     
_________________________________________________________________
cu_dnngru (CuDNNGRU)         (64, None, 1024)          3938304   
_________________________________________________________________
dense (Dense)                (64, None, 65)            66625     
Total params: 4,021,569
Trainable params: 4,021,569
Non-trainable params: 0
_________________________________________________________________


![A drawing of the data passing through the model](https://tensorflow.org/tutorials/sequences/images/text_generation_training.png)

## Try the model before training

In [8]:
input_example_batch, target_example_batch = list(dataset.take(1))[0]
example_batch_predictions = model(input_example_batch)

sampled_indices = tf.random.categorical(example_batch_predictions[0], num_samples=1)
sampled_indices = tf.squeeze(sampled_indices,axis=-1).numpy()

print("(batch_size, sequence_length, vocab_size) : {}".format(example_batch_predictions.shape))
print('=====================================')
print("Input: \n", repr("".join(idx2char[input_example_batch[0]])))
print('=====================================')
print("Next Char Predictions: \n", repr("".join(idx2char[sampled_indices ])))

(batch_size, sequence_length, vocab_size) : (64, 100, 65)
Input: 
 "As is a dead man's nose: but I do see't and feel't\nAs you feel doing thus; and see withal\nThe instru"
Next Char Predictions: 
 "$TdnEeg&y3LJ-XTXyQIM,j'yStlK'KwMtzP&xfVfVuBaL3-MQdKn;3luHZ\n!DKWSL;htFS\nsjX&VZvjy?GfhmrTGshltnhqMdvCL"


## Define loss and prepare for the training

In [9]:
def loss(labels, logits):
    return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)

model.compile(optimizer=tf.train.AdamOptimizer(),
              loss=loss)

checkpoint_dir = './training_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")
checkpoint_callback=tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)

## Run the training procedure

In [10]:
EPOCHS = 10

history = model.fit(dataset.repeat(), 
                    epochs=EPOCHS, 
                    steps_per_epoch=steps_per_epoch, 
                    callbacks=[checkpoint_callback])

Epoch 1/10
Instructions for updating:
Use tf.train.CheckpointManager to manage checkpoints rather than manually editing the Checkpoint proto.
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


## Restore the latest checkpoint and rebuild the model

In [11]:
model = build_model(batch_size=1)
model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (1, None, 256)            16640     
_________________________________________________________________
cu_dnngru_1 (CuDNNGRU)       (1, None, 1024)           3938304   
_________________________________________________________________
dense_1 (Dense)              (1, None, 65)             66625     
Total params: 4,021,569
Trainable params: 4,021,569
Non-trainable params: 0
_________________________________________________________________


## Define a function for the text generation

In [12]:
def generate_text(model, start_string, num_generate):

    input_eval = [char2idx[s] for s in start_string]
    input_eval = tf.expand_dims(input_eval, 0)

    text_generated = []

    # Low temperatures results in more predictable text.
    # Higher temperatures results in more surprising text.
    temperature = 1.0

    model.reset_states()
    for i in range(num_generate):
        predictions = model(input_eval)
        predictions = tf.squeeze(predictions, 0)

        # Using a multinomial distribution to predict the word returned by the model
        predictions = predictions / temperature
        predicted_id = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy()

        input_eval = tf.expand_dims([predicted_id], 0)

        text_generated.append(idx2char[predicted_id])

    return (start_string + ''.join(text_generated))

## Generate the text

In [13]:
print(generate_text(model, start_string=u"ROMEO:", num_generate=1000))

ROMEO:
Beseech you! stand not both his against him.

BAMNAND:
Give you thus. Lay of never hie how he let
Less straight.

KING RICHARD III:
Why, how long you debarr'd; love now it destroy
Of Buchol, how loves.

JULIET:
Another, Pompey; she hath arms, cerhaps breath thy part,
Letters for zealou some angled queen
Of you, in my fortune llance he would est.
Deceited not the news herefore die you give?
Then, is true boy? where to your garlen's viewin!
To sin me with you!

YORK:
'byou grant were thing that get you may profandy for you,
And what the compositur shall be thyself:
So much a milamity.

BUSHY:
'What! will his mout'st thine eaten, we are but heirro's face.
That it didst blead me, thust thou livest thee so of his usfort
Unjurged your presence.
Thou blasts, power, my lord.

KING LEWIS XI:
For the drought is twirt ere not in leave?

KING RICHARD III:
Many fleetings, Doose men, for this good villains.
This sighs be wings with warm and weep thee.

CAMILLO:
Most impatient, sir.

ESCALUS:
