In [1]:
import tensorflow as tf

import numpy as np
import os
import time

In [3]:
#Read the Data
text = open('1661-0.txt', 'rb').read().decode(encoding='utf-8')
print(f'Length of text: {len(text)} characters')

Length of text: 594198 characters


In [4]:
print(text[:250])


Project Gutenberg's The Adventures of Sherlock Holmes, by Arthur Conan Doyle

This eBook is for the use of anyone anywhere at no cost and with
almost no restrictions whatsoever.  You may copy it, give it away or
re-use it under the terms of th


In [5]:
vocab = sorted(set(text))
print(f'{len(vocab)} unique characters')

100 unique characters


In [6]:
#Process the text
example_texts = ['abcdefg', 'xyz']

chars = tf.strings.unicode_split(example_texts, input_encoding='UTF-8')
chars

<tf.RaggedTensor [[b'a', b'b', b'c', b'd', b'e', b'f', b'g'], [b'x', b'y', b'z']]>

In [7]:
ids_from_chars = tf.keras.layers.StringLookup(
    vocabulary=list(vocab), mask_token=None)

In [8]:
ids = ids_from_chars(chars)
ids

<tf.RaggedTensor [[61, 62, 63, 64, 65, 66, 67], [84, 85, 86]]>

In [9]:
chars_from_ids = tf.keras.layers.StringLookup(
    vocabulary=ids_from_chars.get_vocabulary(), invert=True, mask_token=None)

In [10]:
chars = chars_from_ids(ids)
chars

<tf.RaggedTensor [[b'a', b'b', b'c', b'd', b'e', b'f', b'g'], [b'x', b'y', b'z']]>

In [11]:
tf.strings.reduce_join(chars, axis=-1).numpy()

array([b'abcdefg', b'xyz'], dtype=object)

In [12]:
def text_from_ids(ids):
  return tf.strings.reduce_join(chars_from_ids(ids), axis=-1)

In [13]:
#Prediction Task
all_ids = ids_from_chars(tf.strings.unicode_split(text, 'UTF-8'))
all_ids

<tf.Tensor: shape=(594198,), dtype=int64, numpy=array([100,   2,   1, ...,   1,   2,   1], dtype=int64)>

In [14]:
ids_dataset = tf.data.Dataset.from_tensor_slices(all_ids)

In [15]:
for ids in ids_dataset.take(10):
    print(chars_from_ids(ids).numpy().decode('utf-8'))





P
r
o
j
e
c
t


In [16]:
seq_length = 100
examples_per_epoch = len(text)//(seq_length+1)

In [17]:
sequences = ids_dataset.batch(seq_length+1, drop_remainder=True)

for seq in sequences.take(1):
  print(chars_from_ids(seq))

tf.Tensor(
[b'\xef\xbb\xbf' b'\r' b'\n' b'P' b'r' b'o' b'j' b'e' b'c' b't' b' ' b'G'
 b'u' b't' b'e' b'n' b'b' b'e' b'r' b'g' b"'" b's' b' ' b'T' b'h' b'e'
 b' ' b'A' b'd' b'v' b'e' b'n' b't' b'u' b'r' b'e' b's' b' ' b'o' b'f'
 b' ' b'S' b'h' b'e' b'r' b'l' b'o' b'c' b'k' b' ' b'H' b'o' b'l' b'm'
 b'e' b's' b',' b' ' b'b' b'y' b' ' b'A' b'r' b't' b'h' b'u' b'r' b' '
 b'C' b'o' b'n' b'a' b'n' b' ' b'D' b'o' b'y' b'l' b'e' b'\r' b'\n' b'\r'
 b'\n' b'T' b'h' b'i' b's' b' ' b'e' b'B' b'o' b'o' b'k' b' ' b'i' b's'
 b' ' b'f' b'o' b'r' b' '], shape=(101,), dtype=string)


In [18]:
for seq in sequences.take(5):
  print(text_from_ids(seq).numpy())

b"\xef\xbb\xbf\r\nProject Gutenberg's The Adventures of Sherlock Holmes, by Arthur Conan Doyle\r\n\r\nThis eBook is for "
b'the use of anyone anywhere at no cost and with\r\nalmost no restrictions whatsoever.  You may copy it, '
b'give it away or\r\nre-use it under the terms of the Project Gutenberg License included\r\nwith this eBook'
b' or online at www.gutenberg.net\r\n\r\n\r\nTitle: The Adventures of Sherlock Holmes\r\n\r\nAuthor: Arthur Conan'
b' Doyle\r\n\r\nRelease Date: November 29, 2002 [EBook #1661]\r\nLast Updated: May 20, 2019\r\n\r\nLanguage: Engl'


In [19]:
def split_input_target(sequence):
    input_text = sequence[:-1]
    target_text = sequence[1:]
    return input_text, target_text

In [20]:
split_input_target(list("Tensorflow"))

(['T', 'e', 'n', 's', 'o', 'r', 'f', 'l', 'o'],
 ['e', 'n', 's', 'o', 'r', 'f', 'l', 'o', 'w'])

In [21]:
dataset = sequences.map(split_input_target)

In [22]:
for input_example, target_example in dataset.take(1):
    print("Input :", text_from_ids(input_example).numpy())
    print("Target:", text_from_ids(target_example).numpy())

Input : b"\xef\xbb\xbf\r\nProject Gutenberg's The Adventures of Sherlock Holmes, by Arthur Conan Doyle\r\n\r\nThis eBook is for"
Target: b"\r\nProject Gutenberg's The Adventures of Sherlock Holmes, by Arthur Conan Doyle\r\n\r\nThis eBook is for "


In [23]:
#Create training Batches
BATCH_SIZE = 64

# Buffer size to shuffle the dataset

BUFFER_SIZE = 10000

dataset = (
    dataset
    .shuffle(BUFFER_SIZE)
    .batch(BATCH_SIZE, drop_remainder=True)
    .prefetch(tf.data.experimental.AUTOTUNE))

dataset

<PrefetchDataset element_spec=(TensorSpec(shape=(64, 100), dtype=tf.int64, name=None), TensorSpec(shape=(64, 100), dtype=tf.int64, name=None))>

In [24]:
#Build the Model
# Length of the vocabulary in chars
vocab_size = len(vocab)

# The embedding dimension
embedding_dim = 256

# Number of RNN units
rnn_units = 1024

In [27]:
rnn_units = 1024

class MyModel(tf.keras.Model):
  def __init__(self, vocab_size, embedding_dim, rnn_units):
    super().__init__(self)
    self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
    self.gru = tf.keras.layers.GRU(rnn_units,
                                   return_sequences=True,
                                   return_state=True)
    self.dense = tf.keras.layers.Dense(vocab_size)

  def call(self, inputs, states=None, return_state=False, training=False):
    x = inputs
    x = self.embedding(x, training=training)
    if states is None:
      states = self.gru.get_initial_state(x)
    x, states = self.gru(x, initial_state=states, training=training)
    x = self.dense(x, training=training)

    if return_state:
      return x, states
    else:
      return x

In [28]:
model = MyModel(
    # Be sure the vocabulary size matches the `StringLookup` layers.
    vocab_size=len(ids_from_chars.get_vocabulary()),
    embedding_dim=embedding_dim,
    rnn_units=rnn_units)

In [29]:
#Trying the Model
for input_example_batch, target_example_batch in dataset.take(1):
    example_batch_predictions = model(input_example_batch)
    print(example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")


(64, 100, 101) # (batch_size, sequence_length, vocab_size)


In [30]:
model.summary()

Model: "my_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       multiple                  25856     
                                                                 
 gru (GRU)                   multiple                  3938304   
                                                                 
 dense (Dense)               multiple                  103525    
                                                                 
Total params: 4,067,685
Trainable params: 4,067,685
Non-trainable params: 0
_________________________________________________________________


In [31]:
#Trying for the first example in the batch
sampled_indices = tf.random.categorical(example_batch_predictions[0], num_samples=1)
sampled_indices = tf.squeeze(sampled_indices, axis=-1).numpy()

In [32]:
sampled_indices

array([64, 93, 18, 27, 92, 40, 47, 41,  4, 87, 15, 27, 53, 50, 96, 23, 33,
       64, 25, 39, 97,  4, 47, 13, 18, 23, 10, 16, 53, 17, 15, 96,  9, 14,
        4,  7, 99, 58, 25, 37, 38, 38, 35, 26, 60, 88, 39, 97, 35, 15, 12,
       30, 73, 62, 34, 78, 67, 45, 37, 14, 31, 37, 20, 17, 81, 29, 99, 69,
       34, 55, 35, 86, 11, 17,  0, 80, 83, 31,  5,  4, 82, 83, 42, 95, 17,
       68, 92,  4, 74, 49, 55, 42, 72, 31, 78, 26,  3, 41, 50, 53],
      dtype=int64)

In [33]:
print("Input:\n", text_from_ids(input_example_batch[0]).numpy())
print()
print("Next Char Predictions:\n", text_from_ids(sampled_indices).numpy())

Input:
 b'furnished little chamber, with\r\na grey carpet, a large bureau, and a long mirror. Holmes went to the'

Next Char Predictions:
 b'd\xc3\xa909\xc3\xa8IPJ!\xc2\xa3-9VS\xe2\x80\x985Bd7H\xe2\x80\x99!P*05\'.V/-\xe2\x80\x98&,!$\xe2\x80\x9d[7FGGD8_\xc2\xbdH\xe2\x80\x99D-)?mbCrgNF,@F2/u;\xe2\x80\x9diCXDz(/[UNK]tw@"!vwK\xe2\x80\x94/h\xc3\xa8!nRXKl@r8 JSV'


In [34]:
#Train the Model
loss = tf.losses.SparseCategoricalCrossentropy(from_logits=True)

In [35]:
example_batch_mean_loss = loss(target_example_batch, example_batch_predictions)
print("Prediction shape: ", example_batch_predictions.shape, " # (batch_size, sequence_length, vocab_size)")
print("Mean loss:        ", example_batch_mean_loss)

Prediction shape:  (64, 100, 101)  # (batch_size, sequence_length, vocab_size)
Mean loss:         tf.Tensor(4.6146817, shape=(), dtype=float32)


In [36]:
tf.exp(example_batch_mean_loss).numpy()

100.95569

In [37]:
model.compile(optimizer='adam', loss=loss)

In [38]:
#Configuring Checkpoints

# Directory where the checkpoints will be saved
checkpoint_dir = './training_checkpoints'
# Name of the checkpoint files
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)

In [39]:
#Executing the TRAINING
EPOCHS = 20

In [40]:
history = model.fit(dataset, epochs=EPOCHS, callbacks=[checkpoint_callback])

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [41]:
#Generate the TEXT
class OneStep(tf.keras.Model):
  def __init__(self, model, chars_from_ids, ids_from_chars, temperature=1.0):
    super().__init__()
    self.temperature = temperature
    self.model = model
    self.chars_from_ids = chars_from_ids
    self.ids_from_chars = ids_from_chars

    # Create a mask to prevent "[UNK]" from being generated.
    skip_ids = self.ids_from_chars(['[UNK]'])[:, None]
    sparse_mask = tf.SparseTensor(
        # Put a -inf at each bad index.
        values=[-float('inf')]*len(skip_ids),
        indices=skip_ids,
        # Match the shape to the vocabulary
        dense_shape=[len(ids_from_chars.get_vocabulary())])
    self.prediction_mask = tf.sparse.to_dense(sparse_mask)

  @tf.function
  def generate_one_step(self, inputs, states=None):
    # Convert strings to token IDs.
    input_chars = tf.strings.unicode_split(inputs, 'UTF-8')
    input_ids = self.ids_from_chars(input_chars).to_tensor()

    # Run the model.
    # predicted_logits.shape is [batch, char, next_char_logits]
    predicted_logits, states = self.model(inputs=input_ids, states=states,
                                          return_state=True)
    # Only use the last prediction.
    predicted_logits = predicted_logits[:, -1, :]
    predicted_logits = predicted_logits/self.temperature
    # Apply the prediction mask: prevent "[UNK]" from being generated.
    predicted_logits = predicted_logits + self.prediction_mask

    # Sample the output logits to generate token IDs.
    predicted_ids = tf.random.categorical(predicted_logits, num_samples=1)
    predicted_ids = tf.squeeze(predicted_ids, axis=-1)

    # Convert from token ids to characters
    predicted_chars = self.chars_from_ids(predicted_ids)

    # Return the characters and model state.
    return predicted_chars, states

In [42]:
one_step_model = OneStep(model, chars_from_ids, ids_from_chars)

In [43]:
start = time.time()
states = None
next_char = tf.constant(['ROMEO:'])
result = [next_char]

for n in range(1000):
  next_char, states = one_step_model.generate_one_step(next_char, states=states)
  result.append(next_char)

result = tf.strings.join(result)
end = time.time()
print(result[0].numpy().decode('utf-8'), '\n\n' + '_'*80)
print('\nRun time:', end - start)

ROMEO:

“Boscombe Pool, then?”

“Tell me that I had shown you but at a usually interested in A have
better for you. You have fastened sungly cold towards a girl,
for he looked outside cold well with the tasket-chain, as he came
in a disqualing of his share wife as every friends, they are, as her commissions we had
advertured with my face topoded a while we had then.

“‘Well?’ he consedient to close circle turned to the
records of his congrattle ting of the strange and struck as if a hopely shutters up to
ten minday, each Maustered away by easy other late and stronger rambalsquair, the
flowing was of ‘Fore allow, I am sorry tell,” said he. “But away we
have come with—you in the Pasistance of the house?”

“I told you all about it you would not talk all Gord. Two years ago, his wife’s
cried swiftly, and, faintly he turned out of the window rose up
his evid head, shall observed Holmes, spring in his chair and eyes to the
slip of the fity and little from the woods of Bohemia in 

__________

In [44]:
start = time.time()
states = None
next_char = tf.constant(['ROMEO:', 'ROMEO:', 'ROMEO:', 'ROMEO:', 'ROMEO:'])
result = [next_char]

for n in range(1000):
  next_char, states = one_step_model.generate_one_step(next_char, states=states)
  result.append(next_char)

result = tf.strings.join(result)
end = time.time()
print(result, '\n\n' + '_'*80)
print('\nRun time:', end - start)

tf.Tensor(
[b'ROMEO: I must put the facts at the\r\ncase, do?\xe2\x80\x99 I\r\nwould have done now to put his land, though we heard\r\nfrom the first at the old day fellow there may be mneeting in for the\r\ndeserts. This thought we heard some glimpses bying a look at\r\nwon\xe2\x80\x99t in the Gerlyma, not only one, which sat standing in the\r\nmarriage had last, have given me on my upprovinged, and they were\r\nto its returned and extraordinary cloids. Of these had find a\r\nlittle newspaper shoel away the date of the prace which I have a slush of bantar with my boy and\r\nsitting-statement which in many lay happened, but I always believed his\r\neyes. But the goose was all drawn turner. As so open his face, which seemed to give us\r\nall over into my eightfully. \xe2\x80\x9cI am sure!\xe2\x80\x99\r\n\r\n\xe2\x80\x9c\xe2\x80\x98Oh,\xe2\x80\x99 says she, was in Jalanate\xe2\x80\x94\xe2\x80\x99s pledgear. \xe2\x80\x9cThey appeared to be kind to\r\nyet in its guzzen hat authorita_ly\r\n

In [45]:
#Exporting the generator
tf.saved_model.save(one_step_model, 'one_step')
one_step_reloaded = tf.saved_model.load('one_step')





INFO:tensorflow:Assets written to: one_step\assets


INFO:tensorflow:Assets written to: one_step\assets


In [46]:
states = None
next_char = tf.constant(['ROMEO:'])
result = [next_char]

for n in range(100):
  next_char, states = one_step_reloaded.generate_one_step(next_char, states=states)
  result.append(next_char)

print(tf.strings.join(result)[0].numpy().decode("utf-8"))

ROMEO:

Lord St. Simon breakfast and fuller rushed backward, and to the Alphar
Wansons I. I have heard 


In [47]:
#Customized Training 
class CustomTraining(MyModel):
  @tf.function
  def train_step(self, inputs):
      inputs, labels = inputs
      with tf.GradientTape() as tape:
          predictions = self(inputs, training=True)
          loss = self.loss(labels, predictions)
      grads = tape.gradient(loss, model.trainable_variables)
      self.optimizer.apply_gradients(zip(grads, model.trainable_variables))

      return {'loss': loss}

In [48]:
model = CustomTraining(
    vocab_size=len(ids_from_chars.get_vocabulary()),
    embedding_dim=embedding_dim,
    rnn_units=rnn_units)

In [49]:
model.compile(optimizer = tf.keras.optimizers.Adam(),
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True))

In [50]:
model.fit(dataset, epochs=1)



<keras.callbacks.History at 0x22b7622f610>

In [52]:
#Custom Training Loop
EPOCHS = 2

mean = tf.metrics.Mean()

for epoch in range(EPOCHS):
    start = time.time()

    mean.reset_states()
    for (batch_n, (inp, target)) in enumerate(dataset):
        logs = model.train_step([inp, target])
        mean.update_state(logs['loss'])

        if batch_n % 50 == 0:
            template = f"Epoch {epoch+1} Batch {batch_n} Loss {logs['loss']:.4f}"
            print(template)

    # saving (checkpoint) the model every 5 epochs
    if (epoch + 1) % 5 == 0:
        model.save_weights(checkpoint_prefix.format(epoch=epoch))

    print()
    print(f'Epoch {epoch+1} Loss: {mean.result().numpy():.4f}')
    print(f'Time taken for 1 epoch {time.time() - start:.2f} sec')
    print("_"*80)

model.save_weights(checkpoint_prefix.format(epoch=epoch))

Epoch 1 Batch 0 Loss 1.3653
Epoch 1 Batch 50 Loss 1.3561

Epoch 1 Loss: 1.3647
Time taken for 1 epoch 178.09 sec
________________________________________________________________________________
Epoch 2 Batch 0 Loss 1.2861
Epoch 2 Batch 50 Loss 1.3028

Epoch 2 Loss: 1.2981
Time taken for 1 epoch 178.30 sec
________________________________________________________________________________
