In [1]:
import tensorflow as tf
from tensorflow.keras.layers.experimental import preprocessing

import numpy as np
import os
import time

## Download the Shakespeare dataset

In [2]:
path = tf.keras.utils.get_file('shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')

Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt


## Read the data

In [3]:
# Read, then decode 
text = open(path, 'rb').read().decode(encoding='utf-8')
print('Length of text: {} characters'.format(len(text)))

Length of text: 1115394 characters


In [4]:
print(text[:250])

First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you know Caius Marcius is chief enemy to the people.



In [5]:
vocab = sorted(set(text))
print('{} unique characters'.format(len(vocab)))

65 unique characters


## Process the Text

In [6]:
example_texts = ['abcdefg', 'xyz']

chars = tf.strings.unicode_split(example_texts, input_encoding='UTF-8')

In [7]:
chars

<tf.RaggedTensor [[b'a', b'b', b'c', b'd', b'e', b'f', b'g'], [b'x', b'y', b'z']]>

In [8]:
# Converts form tokens to character IDs, padding with 0
ids_from_chars = preprocessing.StringLookup(vocabulary=list(vocab))
ids = ids_from_chars(chars)
print(ids)

<tf.RaggedTensor [[41, 42, 43, 44, 45, 46, 47], [64, 65, 66]]>


In [9]:
chars_from_ids = tf.keras.layers.experimental.preprocessing.StringLookup(
    vocabulary=ids_from_chars.get_vocabulary(), invert=True
)
chars = chars_from_ids(ids)
print(chars)

<tf.RaggedTensor [[b'a', b'b', b'c', b'd', b'e', b'f', b'g'], [b'x', b'y', b'z']]>


In [10]:
# join the characters back into strings
tf.strings.reduce_join(chars, axis=-1).numpy()

array([b'abcdefg', b'xyz'], dtype=object)

In [11]:
def text_from_ids(ids):
  return tf.strings.reduce_join(chars_from_ids(ids), axis=-1)

##The prediction task
Given a character, or a sequence of characters, what is the most probable next character? This is the task you're training the model to perform. The input to the model will be a sequence of characters, and you train the model to predict the output—the following character at each time step.

In [12]:
# Convert the text vector into a stream of character indices
all_ids = ids_from_chars(tf.strings.unicode_split(text, 'UTF-8'))
print(all_ids)

tf.Tensor([20 49 58 ... 47 10  2], shape=(1115394,), dtype=int64)


In [13]:
ids_dataset = tf.data.Dataset.from_tensor_slices(all_ids)

In [14]:
for ids in ids_dataset.take(10):
  print(chars_from_ids(ids).numpy().decode('utf-8'))

F
i
r
s
t
 
C
i
t
i


In [15]:
seq_length = 100
examples_per_epochs = len(text)//(seq_length+1)

In [16]:
sequences = ids_dataset.batch(seq_length+1, drop_remainder=True)

for seq in sequences.take(1):
  print(chars_from_ids(seq))


tf.Tensor(
[b'F' b'i' b'r' b's' b't' b' ' b'C' b'i' b't' b'i' b'z' b'e' b'n' b':'
 b'\n' b'B' b'e' b'f' b'o' b'r' b'e' b' ' b'w' b'e' b' ' b'p' b'r' b'o'
 b'c' b'e' b'e' b'd' b' ' b'a' b'n' b'y' b' ' b'f' b'u' b'r' b't' b'h'
 b'e' b'r' b',' b' ' b'h' b'e' b'a' b'r' b' ' b'm' b'e' b' ' b's' b'p'
 b'e' b'a' b'k' b'.' b'\n' b'\n' b'A' b'l' b'l' b':' b'\n' b'S' b'p' b'e'
 b'a' b'k' b',' b' ' b's' b'p' b'e' b'a' b'k' b'.' b'\n' b'\n' b'F' b'i'
 b'r' b's' b't' b' ' b'C' b'i' b't' b'i' b'z' b'e' b'n' b':' b'\n' b'Y'
 b'o' b'u' b' '], shape=(101,), dtype=string)


In [17]:
for seq in sequences.take(5):
  print(text_from_ids(seq).numpy())

b'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou '
b'are all resolved rather to die than to famish?\n\nAll:\nResolved. resolved.\n\nFirst Citizen:\nFirst, you k'
b"now Caius Marcius is chief enemy to the people.\n\nAll:\nWe know't, we know't.\n\nFirst Citizen:\nLet us ki"
b"ll him, and we'll have corn at our own price.\nIs't a verdict?\n\nAll:\nNo more talking on't; let it be d"
b'one: away, away!\n\nSecond Citizen:\nOne word, good citizens.\n\nFirst Citizen:\nWe are accounted poor citi'


In [18]:
def split_input_target(sequence):
  input_text = sequence[:-1]
  target_text = sequence[1:]
  return input_text, target_text

In [19]:
split_input_target(list('Tensorflow'))

(['T', 'e', 'n', 's', 'o', 'r', 'f', 'l', 'o'],
 ['e', 'n', 's', 'o', 'r', 'f', 'l', 'o', 'w'])

In [20]:
dataset = sequences.map(split_input_target)

In [21]:
for input_example, target_example in dataset.take(1):
  print("Input :", text_from_ids(input_example).numpy())
  print("Target:", text_from_ids(target_example).numpy())

Input : b'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou'
Target: b'irst Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou '


## Create training batches

In [22]:
# Batch size
BATCH_SIZE = 64

# Buffer size to shuffle the dataset
# (TF data is designed to work with possibly infinite sequences,
# do it doen´t attemp to shuffle the entire sequence in memory. Instead,
# it maintains a buffer in which it shuffles elements).
BUFFER_SIZE = 10000

dataset = (
    dataset
    .shuffle(BUFFER_SIZE)
    .batch(BATCH_SIZE, drop_remainder=True)
    .prefetch(tf.data.experimental.AUTOTUNE)
)
print(dataset)

<PrefetchDataset shapes: ((64, 100), (64, 100)), types: (tf.int64, tf.int64)>


## Build the model

In [23]:
# Length of the vocabulary in chars
vocab_size = len(vocab)

# The embedding dimension
embedding_dim = 256

# Number of RNN units
rnn_units = 1024

In [24]:
class MyModel(tf.keras.Model):
  def __init__(self, vocab_size, embedding_dim, rnn_units):
    super().__init__(self)
    self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
    self.gru = tf.keras.layers.GRU(rnn_units,
                                   return_sequences=True,
                                   return_state=True)
    self.dense = tf.keras.layers.Dense(vocab_size)

  def call(self, inputs, states=None, return_state=False, training=False):
    x = inputs
    x = self.embedding(x, training=training)
    if states is None:
      states = self.gru.get_initial_state(x)
    x, states = self.gru(x, initial_state=states, training=training)
    x = self.dense(x, training=training)

    if return_state:
      return x, states
    else:
      return x


In [25]:
model = MyModel(vocab_size=len(ids_from_chars.get_vocabulary()),
                embedding_dim=embedding_dim,
                rnn_units=rnn_units)

In [26]:
for input_example_batch, target_example_batch in dataset.take(1):
  example_batch_predictions = model(input_example_batch)
  print(example_batch_predictions.shape) # (batch_size, sequence_length, vocab_size)

(64, 100, 67)


In [27]:
model.summary()

Model: "my_model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        multiple                  17152     
_________________________________________________________________
gru (GRU)                    multiple                  3938304   
_________________________________________________________________
dense (Dense)                multiple                  68675     
Total params: 4,024,131
Trainable params: 4,024,131
Non-trainable params: 0
_________________________________________________________________


In [28]:
sampled_indices = tf.random.categorical(example_batch_predictions[0], num_samples=1)
sampled_indices = tf.squeeze(sampled_indices, axis=-1).numpy()

In [29]:
sampled_indices

array([14, 24, 45, 21, 23, 15, 44, 34, 12, 27, 28, 50, 14, 42,  6, 60, 24,
       25, 13, 22, 37,  1, 65, 53, 35,  0,  0, 32, 26,  8, 46, 37, 48, 49,
       25, 54, 57,  7, 11, 49, 12, 51, 53, 34, 28, 40, 65,  1, 21, 34,  5,
       43, 34, 35, 19, 65, 31, 57, 27, 19, 51, 30, 39, 20, 57, 33, 19, 48,
        0, 20, 41, 38, 48, 23, 17, 65, 55, 19,  8, 54, 54, 42,  0,  8,  3,
        3, 35, 13, 21, 22, 59, 18,  7, 59,  2, 43, 43, 31, 37, 45])

In [30]:
print("Input:\n", text_from_ids(input_example_batch[0]).numpy())
print()
print("New Char Predictions:\n", text_from_ids(sampled_indices).numpy())

Input:
 b'ce;\nOr worthily, as a good subject should,\nOn some known ground of treachery in him?\n\nJOHN OF GAUNT:'

New Char Predictions:
 b"?JeGIAdT:MNj?b&tJK;HW[UNK]ymURL,fWhiKnq'3i:kmTNZy[UNK]GT$cTUEyQqMEkPYFqSEhFaXhICyoE,nnb,  U;GHsD's\nccQWe"


## Train the model

In [31]:
# This loss function works in this case because it is applied across the last dimension of the predictions.
loss = tf.losses.SparseCategoricalCrossentropy(from_logits=True)

In [32]:
example_batch_loss = loss(target_example_batch, example_batch_predictions)
mean_loss = example_batch_loss.numpy().mean()
print("Prediction shape: ", example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")
print("Mean loss:       ", mean_loss)

Prediction shape:  (64, 100, 67) # (batch_size, sequence_length, vocab_size)
Mean loss:        4.2044897


In [33]:
tf.exp(mean_loss).numpy()

66.986404

In [34]:
model.compile(optimizer='adam', loss=loss)

## Configure checkpoints

In [35]:
# Directory where the checkpoints will be saved
checkpoint_dir = '.training_checkpoints'
# Name of the checkpoint files
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_prefix,
                                                         save_weights_only=True)

## Execute training

In [43]:
EPOCHS = 30

history = model.fit(dataset, epochs=EPOCHS, callbacks=[checkpoint_callback])

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


## Generate Text

Each time you call the model you pass in some text and an internal state. The model returns a prediction for the next character and its new state. Pass the prediction and state back in to continue generating text.

In [44]:
class OneStep(tf.keras.Model):
  def __init__(self, model, chars_from_ids, ids_from_chars, temperature=1.0):
    super().__init__()
    self.temperature=temperature
    self.model = model 
    self.chars_from_ids = chars_from_ids
    self.ids_from_chars = ids_from_chars

    # Create a mask to prevent "" or "[UNK]" from being generated.
    skip_ids = self.ids_from_chars(['', '[UNK]'])[:, None]
    sparse_mask = tf.SparseTensor(
        # Put a -inf at each bad index.
        values=[-float('inf')]*len(skip_ids),
        indices = skip_ids,
        # Match the shape to the vocabulary
        dense_shape = [len(ids_from_chars.get_vocabulary())]
    ) 
    self.prediction_mask = tf.sparse.to_dense(sparse_mask)

  @tf.function
  def generate_one_step(self, inputs, states=None):
    # Convert strings to token IDs.
    input_chars = tf.strings.unicode_split(inputs, 'UTF-8')
    input_ids = self.ids_from_chars(input_chars).to_tensor()

    # Run the model.
    # predicted_logits.shape is [batch, char, next_char_logits]
    predicted_logits, states = self.model(inputs=input_ids, states=states,
                                          return_state=True)
    
    # Only use the last prediction.
    predicted_logits = predicted_logits[:, -1, :]
    predicted_logits = predicted_logits/self.temperature
    # Apply the prediction mask: prevent "", or "[UNK]" from being generated.
    predicted_logits = predicted_logits + self.prediction_mask

    # Sample the output logits to generate token IDs.
    predicted_ids = tf.random.categorical(predicted_logits, num_samples=1)
    predicted_ids = tf.squeeze(predicted_ids, axis=-1)

    # Convert from token ids to characters
    predicted_chars = self.chars_from_ids(predicted_ids)

    # Return the characters and model state
    return predicted_chars, states

In [45]:
one_step_model = OneStep(model, chars_from_ids, ids_from_chars)

In [46]:
start = time.time()
states = None
next_char = tf.constant(['ROMEO:'])
result = [next_char]

for n in range(1000):
  next_char, states = one_step_model.generate_one_step(next_char, states=states)
  result.append(next_char)

result = tf.strings.join(result)
end = time.time()

print(result[0].numpy().decode('utf-8'), '\n\n' + '_'*80)

print(f"\nRun time: {end - start}")

ROMEO:
The smiles, we have heard long.

BRAKENBURY:
Awaked you not what I say?

BIONDELLO:
I toward the battles whereof, one Paris,
Let us and inform myself. What valiant cried
But that her pure unawaged men are
alive,'tis visage, and what end you hear,
Advanced his ravery; I will beseech you
Where the nobility early done.

Children?
Or Cladeners and Durbow?

DUKE OF AUMERLE:
Most It,
As swarn and hopes to be thus fearful'd wit,
and give it off again. Well, knock your grace,
Being press'd on thy fine, but in their offices
As Palia to have the hearts of night
Thus have found easy fines: let them govern heaven
Both ridelicate town yourself.

MARIAN:
How now, my hardy, stand on's too well:
Thou please to fight Warwick and the son, he makes.

RICHARD:
Here comes Both to buy what within me.

Nurse:
Then here I come to know your forward heir.
You heart him sit, left to find it out,--
To do you promise, I am thereaft.

RICHARD:
Nor I; my son,
For this Ancient such weak here.

PETER:
In sality

In [49]:
start = time.time()
states = None
next_char = tf.constant(['ROMEO:', 'ROMEO:', 'ROMEO:', 'ROMEO:', 'ROMEO:'])
result = [next_char]

for n in range(1000):
  next_char, states = one_step_model.generate_one_step(next_char, states=states)
  result.append(next_char)

result = tf.strings.join(result)
end = time.time()

print(result[0].numpy().decode('utf-8'), '\n\n' + '_'*80)

print(f"\nRun time: {end - start}")

ROMEO:
Good day, my lord, this day or Corioli?

PETRUCHIO:
Well, sit, leave this, but at love's right,
Unless about the very noise of battle;
We did, we bid we here brother, with our themen
Most absent meanly in thy breath not. How out of this lay,
I shall remain a change; 'signifies, why he pit
my hand and slance can to wear a little good
But by myself and banquet to my fault:
And what to have done by the shame us else
But what our sort's dear fellow was as these
such cape is not. Take him up his nate in hand.

BENVOLIO:
It may here did we perceive my doom.
For number whended brother shall he pray, lest to-morrow.
An once? why, thou knownst in this land
Jest on thee; therefore prepare as chaplain
Whom I from Rome and Lancaster?

GLOUCESTER:
This is that fault! hear they shall feel the dove;
For nothing stand for vengeance for a twoftly mean.

HASTINGS:
Mistress Ovey heavens!

Third Servingman:
Why, his ormerli, loves me? forgive me, King Leonse,
And then do most guilty dill to him tha

## Export the generator

In [51]:
tf.saved_model.save(one_step_model, 'one_step')
one_step_reloaded = tf.saved_model.load('one_step')





INFO:tensorflow:Assets written to: one_step/assets


INFO:tensorflow:Assets written to: one_step/assets


In [52]:
states = None
next_char = tf.constant(['ROMEO:'])
result = [next_char]

for n in range(100):
  next_char, states = one_step_reloaded.generate_one_step(next_char, states=states)
  result.append(next_char)

print(tf.strings.join(result)[0].numpy().decode("utf-8"))









ROMEO:
When the sun sets, the angry Margery, and how came the creature
Wherewome in earth from sleep unto 


## Advanced: Customized Training

The basic procedure is:

1.   Execute the model and calculate the loss under a tf.GradientTape.
2.   Calculate the updates and apply them to the model using the optimizer.



In [58]:
class CustomTraining(MyModel):
  @tf.function
  def train_step(self, inputs):
    inputs, label = inputs
    with tf.GradientTape() as tape:
      predictions = self(inputs, training=True)
      loss = self.loss(label, predictions)
    grads = tape.gradient(loss, model.trainable_variables)
    self.optimizer.apply_gradients(zip(grads, model.trainable_variables))

    return {'loss': loss}

In [59]:
model = CustomTraining(vocab_size=len(ids_from_chars.get_vocabulary()),
                       embedding_dim=embedding_dim,
                       rnn_units=rnn_units)

In [60]:
model.compile(optimizer=tf.keras.optimizers.Adam(),
              loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True))

In [61]:
model.fit(dataset, epochs=1)



<tensorflow.python.keras.callbacks.History at 0x7f1e303ae588>

In [64]:
EPOCHS = 10

mean = tf.metrics.Mean()

for epoch in range(EPOCHS):
  start = time.time()

  mean.reset_states()
  for(batch_n, (inp, target)) in enumerate(dataset):
    logs = model.train_step([inp, target])
    mean.update_state(logs['loss'])

    if batch_n % 50 == 0:
      template = "Epoch {} Batch {} Loss {}"
      print(template.format(epoch + 1, batch_n, logs['loss']))

    # saving (checkpoint) the model every 5 epochs
    if (epoch + 1) % 5 == 0:
      model.save_weights(checkpoint_prefix.format(epoch=epoch))
    
    print()
    print('Epoch {} Loss: {:.4f}'.format(epoch + 1, mean.result().numpy()))
    print('Time taken for 1 epoch {} sec'.format(time.time() - start))
    print("_"*80)

model.save_weights(checkpoint_prefix.format(epoch=epoch))

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Epoch 3 Loss: 1.3997
Time taken for 1 epoch 15.898061752319336 sec
________________________________________________________________________________

Epoch 3 Loss: 1.3997
Time taken for 1 epoch 16.006205320358276 sec
________________________________________________________________________________

Epoch 3 Loss: 1.3991
Time taken for 1 epoch 16.116060495376587 sec
________________________________________________________________________________

Epoch 3 Loss: 1.3989
Time taken for 1 epoch 16.227539539337158 sec
________________________________________________________________________________

Epoch 3 Loss: 1.3987
Time taken for 1 epoch 16.341952085494995 sec
________________________________________________________________________________

Epoch 3 Loss: 1.3988
Time taken for 1 epoch 16.44980478286743 sec
________________________________________________________________________________

Epoch 3 Loss: 1.3988
Time taken for 1 epoc