##### Copyright 2019 The TensorFlow Authors.

In [21]:
#@title Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Text generation with an RNN

This tutorial demonstrates how to generate text using a character-based RNN. You will work with a dataset of code's writing from Andrej Karpathy's [The Unreasonable Effectiveness of Recurrent Neural Networks](http://karpathy.github.io/2015/05/21/rnn-effectiveness/). Given a sequence of characters from this data ("Shakespear"), train a model to predict the next character in the sequence ("e"). Longer sequences of text can be generated by calling the model repeatedly.

Note: Enable GPU acceleration to execute this notebook faster. In Colab: *Runtime > Change runtime type > Hardware accelerator > GPU*.

This tutorial includes runnable code implemented using [tf.keras](https://www.tensorflow.org/guide/keras/sequential_model) and [eager execution](https://www.tensorflow.org/guide/eager). The following is the sample output when the model in this tutorial trained for 30 epochs, and started with the prompt "Q":

## Setup

### Import TensorFlow and other libraries

In [22]:
import tensorflow as tf

import numpy as np
import os
import time
import re

### Download the code dataset

Change the following line to run this code on your own data.

In [23]:
# Specify the local path to the file
file_path = './code.txt'

# Open and read the file
with open(file_path, 'r') as file:
    text = file.read()

# Print the content or use it for training
print(text)
print(f'Length of text: {len(text)} characters')

if x > 10:
    print("x is greater than 10")
else:
    print("x is 10 or less")

x = 12
x is greater than 10

x = 8
x is 10 or less

while x < 5:
    print(f"x is {x}")
    x += 1

x = 2
x is 2
x is 3
x is 4

if y == 0:
    print("y is zero")
elif y < 0:
    print("y is negative")
else:
    print("y is positive")

y = 0
y is zero

y = -3
y is negative

y = 7
y is positive

if z == 5:
    print("z is equal to 5")
else:
    print("z is not 5")

z = 5
z is equal to 5

z = 3
z is not 5

for i in range(3):
    print(f"i is {i}")

i = 0
i is 0
i is 1
i is 2

x = 15
if x % 2 == 0:
    print("x is even")
else:
    print("x is odd")

x = 15
x is odd

x = 14
x is even

while x > 0:
    print(f"x is {x}")
    x -= 2

x = 12
x is 12
x is 10
x is 8
x is 6
x is 4
x is 2

if y > 0:
    print("y is positive")
elif y == 0:
    print("y is zero")
else:
    print("y is negative")

y = 4
y is positive

y = 0
y is zero

y = -1
y is negative

for j in range(1, 6):
    print(f"j is {j}")

j = 1
j is 1
j is 2

## Process the text

### Vectorize the text

Before training, you need to convert the strings to a numerical representation. 

The `tf.keras.layers.StringLookup` layer can convert each character into a numeric ID. It just needs the text to be split into tokens first.

In [24]:
# Step 2: Preprocess the code (lowercase and remove extra spaces)
def preprocess_code(code):
    # Convert text to lowercase
    code = code.lower()
    # Optionally, remove extra spaces or special symbols
    code = re.sub(r'\s+', ' ', code)  # Replace multiple spaces with a single space
    code = re.sub(r'[^\x00-\x7F]+', '', code)  # Remove non-ASCII characters (optional)
    return code

# Preprocess the text from the file
text = preprocess_code(text)

# Step 3: Vectorize the text at the character level using TensorFlow
# Create a TextVectorization layer for character-level tokenization
vectorizer = tf.keras.layers.TextVectorization(
    standardize=None,  # Don't standardize (we've already processed)
    output_mode='int',  # Return indices of characters
    output_sequence_length=100  # Set a sequence length limit
)

# Adapt the vectorizer to the text (fit on the text)
vectorizer.adapt([text])

# Step 4: Vectorize the text
vectorized_text = vectorizer(text)
print(f"Vectorized text (first 100 characters): {vectorized_text[:100]}")

# Step 5: Ensure that the [UNK] token is at the start of the vocabulary
vocabulary = vectorizer.get_vocabulary()

# If [UNK] is not the first token, move it to the first position
if vocabulary[0] != '[UNK]':
    vocabulary = ['[UNK]'] + [token for token in vocabulary if token != '[UNK]']

# Step 6: Create StringLookup layer with the corrected vocabulary
chars_from_ids = tf.keras.layers.StringLookup(
    vocabulary=vocabulary, invert=True, mask_token=None
)

# Decode the vectorized text back into characters
decoded_text = chars_from_ids(vectorized_text)
print(f"Decoded text (first 100 characters): {decoded_text.numpy()[:100]}")


Vectorized text (first 100 characters): [11  3 16 75 10  2 43 32 76 12 10  2  8 38 63  3  4 25  3  2 43 32  8  3
  4 48  3  2  8 38 64 29  3 23 49 33  2 26  3 80 20  3  4  7  3  2  7  3
  2 24  3  2 19 11  5 17 13  9  2 27 46  5 23 13  9  2 40 12  9  2 35  5
  4 21  5  2 28  5  4 78  5  2 41  5  4 67  5  2 36 11 14 17 49 34  2 45
 30 50 12 34]
Decoded text (first 100 characters): [b'if' b'x' b'>' b'10:' b'print("x' b'is' b'greater' b'than' b'10")'
 b'else:' b'print("x' b'is' b'10' b'or' b'less")' b'x' b'=' b'12' b'x'
 b'is' b'greater' b'than' b'10' b'x' b'=' b'8' b'x' b'is' b'10' b'or'
 b'less' b'while' b'x' b'<' b'5:' b'print(f"x' b'is' b'{x}")' b'x' b'+='
 b'1' b'x' b'=' b'2' b'x' b'is' b'2' b'x' b'is' b'3' b'x' b'is' b'4' b'if'
 b'y' b'==' b'0:' b'print("y' b'is' b'zero")' b'elif' b'y' b'<' b'0:'
 b'print("y' b'is' b'negative")' b'else:' b'print("y' b'is' b'positive")'
 b'y' b'=' b'0' b'y' b'is' b'zero' b'y' b'=' b'-3' b'y' b'is' b'negative'
 b'y' b'=' b'7' b'y' b'is' b'positive' b'

### The prediction task

Given a character, or a sequence of characters, what is the most probable next character? This is the task you're training the model to perform. The input to the model will be a sequence of characters, and you train the model to predict the output—the following character at each time step.

Since RNNs maintain an internal state that depends on the previously seen elements, given all the characters computed until this moment, what is the next character?


### Create training examples and targets

Next divide the text into example sequences. Each input sequence will contain `seq_length` characters from the text.

For each input sequence, the corresponding targets contain the same length of text, except shifted one character to the right.

So break the text into chunks of `seq_length+1`. For example, say `seq_length` is 4 and our text is "Hello". The input sequence would be "Hell", and the target sequence "ello".

To do this first use the `tf.data.Dataset.from_tensor_slices` function to convert the text vector into a stream of character indices.

In [25]:
ids_from_chars = tf.keras.layers.StringLookup(vocabulary=vocabulary, mask_token=None)

all_ids = ids_from_chars(tf.strings.unicode_split(text, 'UTF-8'))

# Display the result
print(f"All character indices: {all_ids.numpy()}")

All character indices: [15  0  0 ...  0  0  0]


In [26]:
ids_dataset = tf.data.Dataset.from_tensor_slices(all_ids)

In [27]:
for ids in ids_dataset.take(10):
    print(chars_from_ids(ids).numpy().decode('utf-8'))

i
[UNK]
[UNK]
x
[UNK]
>
[UNK]
1
0
[UNK]


2024-12-05 14:40:50.467777: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


In [28]:
seq_length = 100


The `batch` method lets you easily convert these individual characters to sequences of the desired size.

In [29]:
sequences = ids_dataset.batch(seq_length+1, drop_remainder=True)

for seq in sequences.take(1):
  print(chars_from_ids(seq))

tf.Tensor(
[b'i' b'[UNK]' b'[UNK]' b'x' b'[UNK]' b'>' b'[UNK]' b'1' b'0' b'[UNK]'
 b'[UNK]' b'[UNK]' b'[UNK]' b'i' b'[UNK]' b'[UNK]' b'[UNK]' b'[UNK]' b'x'
 b'[UNK]' b'i' b'[UNK]' b'[UNK]' b'[UNK]' b'[UNK]' b'[UNK]' b'[UNK]'
 b'[UNK]' b'[UNK]' b'[UNK]' b'[UNK]' b'[UNK]' b'[UNK]' b'[UNK]' b'[UNK]'
 b'[UNK]' b'1' b'0' b'[UNK]' b'[UNK]' b'[UNK]' b'[UNK]' b'[UNK]' b'[UNK]'
 b'[UNK]' b'[UNK]' b'[UNK]' b'[UNK]' b'[UNK]' b'i' b'[UNK]' b'[UNK]'
 b'[UNK]' b'[UNK]' b'x' b'[UNK]' b'i' b'[UNK]' b'[UNK]' b'1' b'0' b'[UNK]'
 b'[UNK]' b'[UNK]' b'[UNK]' b'[UNK]' b'[UNK]' b'[UNK]' b'[UNK]' b'[UNK]'
 b'[UNK]' b'[UNK]' b'x' b'[UNK]' b'=' b'[UNK]' b'1' b'2' b'[UNK]' b'x'
 b'[UNK]' b'i' b'[UNK]' b'[UNK]' b'[UNK]' b'[UNK]' b'[UNK]' b'[UNK]'
 b'[UNK]' b'[UNK]' b'[UNK]' b'[UNK]' b'[UNK]' b'[UNK]' b'[UNK]' b'[UNK]'
 b'[UNK]' b'1' b'0' b'[UNK]' b'x'], shape=(101,), dtype=string)


2024-12-05 14:40:50.557400: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


It's easier to see what this is doing if you join the tokens back into strings:

For training you'll need a dataset of `(input, label)` pairs. Where `input` and 
`label` are sequences. At each time step the input is the current character and the label is the next character. 

Here's a function that takes a sequence as input, duplicates, and shifts it to align the input and label for each timestep:

In [30]:
def split_input_target(sequence):
    input_text = sequence[:-1]
    target_text = sequence[1:]
    return input_text, target_text

In [31]:
split_input_target(list("Tensorflow"))

(['T', 'e', 'n', 's', 'o', 'r', 'f', 'l', 'o'],
 ['e', 'n', 's', 'o', 'r', 'f', 'l', 'o', 'w'])

In [32]:
dataset = sequences.map(split_input_target)

In [33]:
# Define the reverse StringLookup layer for decoding the indices back to characters
chars_from_ids = tf.keras.layers.StringLookup(vocabulary=vocabulary, invert=True, mask_token=None)

# Step 1: Example dataset with input and target sequences
for input_example, target_example in dataset.take(1):
    # Decode the input and target sequences using chars_from_ids

    # Convert tensors to numpy arrays of integers
    input_text = input_example.numpy()  # input_example is already a tensor of integers
    target_text = target_example.numpy()  # target_example is already a tensor of integers

    # Decode the numpy array of indices to characters
    input_text_decoded = ''.join([vocabulary[i] if i != 0 else '' for i in input_text])  # Ignore padding (0)
    target_text_decoded = ''.join([vocabulary[i] if i != 0 else '' for i in target_text])  # Ignore padding (0)

    # Print the result
    print("Input  :", input_text_decoded)
    print("Target :", target_text_decoded)

Input  : ix>10ixi10ixi10x=12xi10
Target : x>10ixi10ixi10x=12xi10x


2024-12-05 14:40:51.687191: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


### Create training batches

You used `tf.data` to split the text into manageable sequences. But before feeding this data into the model, you need to shuffle the data and pack it into batches.

In [34]:
# Batch size
BATCH_SIZE = 64

# Buffer size to shuffle the dataset
# (TF data is designed to work with possibly infinite sequences,
# so it doesn't attempt to shuffle the entire sequence in memory. Instead,
# it maintains a buffer in which it shuffles elements).
BUFFER_SIZE = 10000

dataset = (
    dataset
    .shuffle(BUFFER_SIZE)
    .batch(BATCH_SIZE, drop_remainder=True)
    .prefetch(tf.data.experimental.AUTOTUNE))

dataset

<_PrefetchDataset element_spec=(TensorSpec(shape=(64, 100), dtype=tf.int64, name=None), TensorSpec(shape=(64, 100), dtype=tf.int64, name=None))>

## Build The Model

This section defines the model as a `keras.Model` subclass (For details see [Making new Layers and Models via subclassing](https://www.tensorflow.org/guide/keras/custom_layers_and_models)). 

This model has three layers:

* `tf.keras.layers.Embedding`: The input layer. A trainable lookup table that will map each character-ID to a vector with `embedding_dim` dimensions;
* `tf.keras.layers.GRU`: A type of RNN with size `units=rnn_units` (You can also use an LSTM layer here.)
* `tf.keras.layers.Dense`: The output layer, with `vocab_size` outputs. It outputs one logit for each character in the vocabulary. These are the log-likelihood of each character according to the model.

In [35]:
# Length of the vocabulary in StringLookup Layer
tokenizer = tf.keras.layers.TextVectorization(standardize='lower_and_strip_punctuation')
tokenizer.adapt(text)

vocab_size = len(tokenizer.get_vocabulary())

# The embedding dimension
embedding_dim = 256

# Number of RNN units
rnn_units = 1024

In [36]:
class MyModel(tf.keras.Model):
    def __init__(self, vocab_size, embedding_dim, rnn_units):
        super().__init__()
        self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
        self.gru = tf.keras.layers.GRU(rnn_units, return_sequences=True, return_state=True)
        self.dense = tf.keras.layers.Dense(vocab_size)

    def call(self, inputs, states=None, return_state=False, training=False):
        y = inputs
        x = self.embedding(y, training=training)
        print(f"Embedding output shape: {x.shape}")

        if states is None:
            batch_size = tf.shape(inputs)[0]
            states = [tf.zeros((batch_size, self.gru.units))]
            print(f"Initialized states shape: {states[0].shape}")

        x, states = self.gru(x, initial_state=states, training=training)
        print(f"GRU output shape: {x.shape}")
        print(f"GRU states shape: {states.shape}")

        x = self.dense(x, training=training)
        print(f"Dense output shape: {x.shape}")

        if return_state:
            return x, states
        return x


# Create the model
model = MyModel(
    vocab_size=vocab_size,
    embedding_dim=embedding_dim,
    rnn_units=rnn_units
)

# Create a dummy input with batch_size=1 and sequence_length=10
dummy_input = tf.random.uniform(shape=(1, 10), minval=0, maxval=vocab_size, dtype=tf.int32)

# Perform a forward pass to initialize the model
model(dummy_input)

# Now the model should be built and you can check the summary
model.summary()

Embedding output shape: (1, 10, 256)
Initialized states shape: (1, 1024)
GRU output shape: (1, 10, 1024)
GRU states shape: (1, 1024)
Dense output shape: (1, 10, 55)
Embedding output shape: (1, 10, 256)
Initialized states shape: (1, 1024)
GRU output shape: (1, 10, 1024)
GRU states shape: (1, 1024)
Dense output shape: (1, 10, 55)


For each character the model looks up the embedding, runs the GRU one timestep with the embedding as input, and applies the dense layer to generate logits predicting the log-likelihood of the next character:

![A drawing of the data passing through the model](images/text_generation_training.png)

Note: For training you could use a `keras.Sequential` model here. To  generate text later you'll need to manage the RNN's internal state. It's simpler to include the state input and output options upfront, than it is to rearrange the model architecture later. For more details see the [Keras RNN guide](https://www.tensorflow.org/guide/keras/rnn#rnn_state_reuse).

## Try the model

Now run the model to see that it behaves as expected.

First check the shape of the output:

In [42]:

# Define loss function
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

# Shuffle and batch the dataset (without repeat)
dataset = dataset.shuffle(10).batch(10)

# Define number of steps to iterate over the dataset (e.g., 100 steps)
num_steps = 100
step_counter = 0

# Manually control the number of steps
for input_example_batch, target_example_batch in dataset.take(num_steps):
    print("hello")
    # Make predictions with the model
    example_batch_predictions = model(input_example_batch, training=False)
    
    # Print the shape of the predictions
    print(example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")

    # Calculate the loss for this batch
    example_batch_mean_loss = loss(target_example_batch, example_batch_predictions)
    print("Mean loss:        ", example_batch_mean_loss.numpy())

    # Sample indices from the predictions (probabilities)
    sampled_indices = tf.random.categorical(example_batch_predictions[0], num_samples=1)
    sampled_indices = tf.squeeze(sampled_indices, axis=-1).numpy()

    # Convert input batch and sampled predictions to text
    print("Input:\n", text_from_ids(input_example_batch[0]).numpy())
    print()
    print("Next Char Predictions:\n", text_from_ids(sampled_indices))

    # Increment step counter
    step_counter += 1

    # If you reach the desired number of steps, break the loop
    if step_counter >= num_steps:
        break



<_TakeDataset element_spec=(TensorSpec(shape=(None, None, None, 64, 100), dtype=tf.int64, name=None), TensorSpec(shape=(None, None, None, 64, 100), dtype=tf.int64, name=None))>


2024-12-05 15:15:10.899011: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Decode these to see the text predicted by this untrained model:

## Train the model

At this point the problem can be treated as a standard classification problem. Given the previous RNN state, and the input this time step, predict the class of the next character.

### Attach an optimizer, and a loss function

The standard `tf.keras.losses.sparse_categorical_crossentropy` loss function works in this case because it is applied across the last dimension of the predictions.

Because your model returns logits, you need to set the `from_logits` flag.


In [38]:
loss = tf.losses.SparseCategoricalCrossentropy(from_logits=True)

In [39]:
example_batch_mean_loss = loss(target_example_batch, example_batch_predictions)
print("Prediction shape: ", example_batch_predictions.shape, " # (batch_size, sequence_length, vocab_size)")
print("Mean loss:        ", example_batch_mean_loss)

NameError: name 'target_example_batch' is not defined

A newly initialized model shouldn't be too sure of itself, the output logits should all have similar magnitudes. To confirm this you can check that the exponential of the mean loss is approximately equal to the vocabulary size. A much higher loss means the model is sure of its wrong answers, and is badly initialized:

In [155]:
tf.exp(example_batch_mean_loss).numpy()

65.93071

Configure the training procedure using the `tf.keras.Model.compile` method. Use `tf.keras.optimizers.Adam` with default arguments and the loss function.

In [41]:
model.compile(optimizer='adam', loss=loss)

### Configure checkpoints

Use a `tf.keras.callbacks.ModelCheckpoint` to ensure that checkpoints are saved during training:

In [43]:
# Directory where the checkpoints will be saved
checkpoint_dir = './training_checkpoints'

# Name of the checkpoint files (with proper extension)
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}.weights.h5")

# Define the ModelCheckpoint callback
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True
)

print(f"Checkpoints will be saved at: {checkpoint_prefix}")

Checkpoints will be saved at: ./training_checkpoints/ckpt_{epoch}.weights.h5


### Execute the training

To keep training time reasonable, use 10 epochs to train the model. In Colab, set the runtime to GPU for faster training.

In [39]:
EPOCHS = 5

In [40]:
history = model.fit(dataset, epochs=EPOCHS, callbacks=[checkpoint_callback], steps_per_epoch=None)

NameError: name 'checkpoint_callback' is not defined

## Generate text

The simplest way to generate text with this model is to run it in a loop, and keep track of the model's internal state as you execute it.

![To generate text the model's output is fed back to the input](images/text_generation_sampling.png)

Each time you call the model you pass in some text and an internal state. The model returns a prediction for the next character and its new state. Pass the prediction and state back in to continue generating text.


The following makes a single step prediction:

In [160]:
class OneStep(tf.keras.Model):
  def __init__(self, model, chars_from_ids, ids_from_chars, temperature=1.0):
    super().__init__()
    self.temperature = temperature
    self.model = model
    self.chars_from_ids = chars_from_ids
    self.ids_from_chars = ids_from_chars

    # Create a mask to prevent "[UNK]" from being generated.
    skip_ids = self.ids_from_chars(['[UNK]'])[:, None]
    sparse_mask = tf.SparseTensor(
        # Put a -inf at each bad index.
        values=[-float('inf')]*len(skip_ids),
        indices=skip_ids,
        # Match the shape to the vocabulary
        dense_shape=[len(ids_from_chars.get_vocabulary())])
    self.prediction_mask = tf.sparse.to_dense(sparse_mask)

  @tf.function
  def generate_one_step(self, inputs, states=None):
    # Convert strings to token IDs.
    input_chars = tf.strings.unicode_split(inputs, 'UTF-8')
    input_ids = self.ids_from_chars(input_chars).to_tensor()

    # Run the model.
    # predicted_logits.shape is [batch, char, next_char_logits]
    predicted_logits, states = self.model(inputs=input_ids, states=states,
                                          return_state=True)
    # Only use the last prediction.
    predicted_logits = predicted_logits[:, -1, :]
    predicted_logits = predicted_logits/self.temperature
    # Apply the prediction mask: prevent "[UNK]" from being generated.
    predicted_logits = predicted_logits + self.prediction_mask

    # Sample the output logits to generate token IDs.
    predicted_ids = tf.random.categorical(predicted_logits, num_samples=1)
    predicted_ids = tf.squeeze(predicted_ids, axis=-1)

    # Convert from token ids to characters
    predicted_chars = self.chars_from_ids(predicted_ids)

    # Return the characters and model state.
    return predicted_chars, states

In [161]:
one_step_model = OneStep(model, chars_from_ids, ids_from_chars)

Run it in a loop to generate some text. Looking at the generated text, you'll see the model knows when to capitalize, make paragraphs and imitates a code-like writing vocabulary. With the small number of training epochs, it has not yet learned to form coherent sentences.

In [162]:
start = time.time()
states = None
next_char = tf.constant(['ROMEO:'])
result = [next_char]

for n in range(1000):
  next_char, states = one_step_model.generate_one_step(next_char, states=states)
  result.append(next_char)

result = tf.strings.join(result)
end = time.time()
print(result[0].numpy().decode('utf-8'), '\n\n' + '_'*80)
print('\nRun time:', end - start)

Embedding output shape: (1, None, 256)
Initialized states shape: (1, 1024)
GRU output shape: (1, None, 1024)
GRU states shape: (1, 1024)
Dense output shape: (1, None, 66)
Embedding output shape: (1, None, 256)
GRU output shape: (1, None, 1024)
GRU states shape: (1, 1024)
Dense output shape: (1, None, 66)
ROMEO: Came.

CLARENCE:
Wherewo have a mother dies to slave, do
not how the law oth the faces as
up, I have lend between air.
O pity, I have power to town it
Tust Slabext, tarry in this trie;;
And I, who stopless. Is it is my lord
Inceest thou intencements, ne'er ane of mine?

DUKE OF AUMELLE:
I cannot made, am I see forthy desires
Promised war and life, brought to old confer's me.

Betteres: they not longo me with what he wings?

AUTONCLARD:
I know deniment leakness, for his lime were I
And follow nuts, being good malicate.

FRIAR:
Hast I a leave a comblacomrous, and I moot
And help me with give against said tha.
Farely said, full of some gutyen'd:
My life, for soon brack sheWhing thy

The easiest thing you can do to improve the results is to train it for longer (try `EPOCHS = 30`).

You can also experiment with a different start string, try adding another RNN layer to improve the model's accuracy, or adjust the temperature parameter to generate more or less random predictions.

If you want the model to generate text *faster* the easiest thing you can do is batch the text generation. In the example below the model generates 5 outputs in about the same time it took to generate 1 above. 

In [163]:
start = time.time()
states = None
next_char = tf.constant(['ROMEO:', 'ROMEO:', 'ROMEO:', 'ROMEO:', 'ROMEO:'])
result = [next_char]

for n in range(1000):
  next_char, states = one_step_model.generate_one_step(next_char, states=states)
  result.append(next_char)

result = tf.strings.join(result)
end = time.time()
print(result, '\n\n' + '_'*80)
print('\nRun time:', end - start)

Embedding output shape: (5, None, 256)
Initialized states shape: (5, 1024)
GRU output shape: (5, None, 1024)
GRU states shape: (5, 1024)
Dense output shape: (5, None, 66)
Embedding output shape: (5, None, 256)
GRU output shape: (5, None, 1024)
GRU states shape: (5, 1024)
Dense output shape: (5, None, 66)
tf.Tensor(
[b"ROMEO:\nWhat is't not, by been?\n\nCATESBY:\nYou were redance, my lord: a mocking his pardly or you.\n\nDUKE OF AUMERLE:\nEven fibther, my lord: with him, and say'st\nWarwick, yent; For fled, now, pleased,\nHis shade here plospess'd Throughted, though any need.\nHath hence a man belefit it.\nLook, it is a cheer. Why, you spoke im\nFirst unkingness cares not sleeping; nowly!\n\nMOFF EDWARD:\nThat is this? hence? Say no; I come to med, bit thou cive?\n\nPRocHOR OF GLURET:\nWere on't you are comes come again:\nWith my partine Canison, I had recevert!\n\nFurst:\nHe's nor not our aspair.\n\nGONZALO:\nTake my lord, I soom more to mird Citizen:\nI'll hand resemble with be thy ch