In [14]:
import tensorflow as tf

import numpy as np
import os
import time

In [15]:
path_to_file = tf.keras.utils.get_file('shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')

In [16]:
text = open(path_to_file, "rb").read().decode(encoding="utf-8")

print(f"Length of text: {len(text)} characters")

Length of text: 1115394 characters


In [17]:
print(text[:250])

First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you know Caius Marcius is chief enemy to the people.



In [18]:
vocab = sorted(set(text))
print(f"{len(vocab)} unique characters")

65 unique characters


In [19]:
char2idx = {u:i for i, u in enumerate(vocab)}
idx2char = np.array(vocab)

text_as_int = np.array([char2idx[c] for c in text])

In [20]:
print("{")
for char, _ in zip(char2idx, range(20)):
  print(f"  {repr(char):4s}: {char2idx[char]:3d},")
print('  ...\n}')

{
  '\n':   0,
  ' ' :   1,
  '!' :   2,
  '$' :   3,
  '&' :   4,
  "'" :   5,
  ',' :   6,
  '-' :   7,
  '.' :   8,
  '3' :   9,
  ':' :  10,
  ';' :  11,
  '?' :  12,
  'A' :  13,
  'B' :  14,
  'C' :  15,
  'D' :  16,
  'E' :  17,
  'F' :  18,
  'G' :  19,
  ...
}


In [21]:
print(f"{text[:13]} ---- characters are mapped to int ----> {text_as_int[:13]}")

First Citizen ---- characters are mapped to int ----> [18 47 56 57 58  1 15 47 58 47 64 43 52]


In [22]:
seq_length = 100
examples_per_epoch = len(text) // (seq_length + 1)

char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)

for i in char_dataset.take(5):
  print(idx2char[i.numpy()])

F
i
r
s
t


In [23]:
sequences = char_dataset.batch(seq_length+1, drop_remainder=True)

for item in sequences.take(5):
  print(repr("".join(idx2char[item.numpy()])))

'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou '
'are all resolved rather to die than to famish?\n\nAll:\nResolved. resolved.\n\nFirst Citizen:\nFirst, you k'
"now Caius Marcius is chief enemy to the people.\n\nAll:\nWe know't, we know't.\n\nFirst Citizen:\nLet us ki"
"ll him, and we'll have corn at our own price.\nIs't a verdict?\n\nAll:\nNo more talking on't; let it be d"
'one: away, away!\n\nSecond Citizen:\nOne word, good citizens.\n\nFirst Citizen:\nWe are accounted poor citi'


In [24]:
def split_input_target(chunk):
  input_text = chunk[:-1]
  target_text = chunk[1:]
  return input_text, target_text

dataset = sequences.map(split_input_target)

In [25]:
for input_example, target_example in dataset.take(1):
  print("Input data: ", repr("".join(idx2char[input_example.numpy()])))
  print("Target data: ", repr("".join(idx2char[target_example.numpy()])))

Input data:  'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou'
Target data:  'irst Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou '


In [28]:
for i, (input_idx, target_idx) in enumerate(zip(input_example[:5], target_example[:5])):
  print(f"Step {i:4d}")
  print(f"  input: {input_idx} ({repr(idx2char[input_idx])})")
  print(f"  expected output: {target_idx} ({repr(idx2char[target_idx])})")

Step    0
  input: 18 ('F')
  expected output: 47 ('i')
Step    1
  input: 47 ('i')
  expected output: 56 ('r')
Step    2
  input: 56 ('r')
  expected output: 57 ('s')
Step    3
  input: 57 ('s')
  expected output: 58 ('t')
Step    4
  input: 58 ('t')
  expected output: 1 (' ')


In [29]:
BATCH_SIZE = 64

BUFFER_SIZE = 10000

dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)

dataset

<BatchDataset shapes: ((64, 100), (64, 100)), types: (tf.int64, tf.int64)>

In [30]:
vocab_size = len(vocab)

embedding_dim = 256

rnn_units = 1024

In [31]:
def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
  model = tf.keras.Sequential([
      tf.keras.layers.Embedding(vocab_size, embedding_dim, batch_input_shape=[batch_size, None]),
      tf.keras.layers.GRU(rnn_units,
                          return_sequences=True,
                          stateful=True,
                          recurrent_initializer='glorot_uniform'),
      tf.keras.layers.Dense(vocab_size)                         
  ])
  return model

In [32]:
model = build_model(
    vocab_size=len(vocab),
    embedding_dim=embedding_dim,
    rnn_units=rnn_units,
    batch_size=BATCH_SIZE
)

In [34]:
for input_example_batch, target_example_batch in dataset.take(1):
  example_batch_predictions = model(input_example_batch)
  print(example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")

(64, 100, 65) # (batch_size, sequence_length, vocab_size)


In [35]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (64, None, 256)           16640     
_________________________________________________________________
gru (GRU)                    (64, None, 1024)          3938304   
_________________________________________________________________
dense (Dense)                (64, None, 65)            66625     
Total params: 4,021,569
Trainable params: 4,021,569
Non-trainable params: 0
_________________________________________________________________


In [36]:
sampled_indices = tf.random.categorical(example_batch_predictions[0], num_samples=1)
sampled_indices = tf.squeeze(sampled_indices, axis=-1).numpy()

In [37]:
sampled_indices

array([27, 20, 60, 30, 38, 43,  5, 16, 48, 45,  7, 26,  7, 46, 51, 59, 57,
       34, 37,  0, 37, 25,  3, 14, 58, 54, 31,  1, 62, 54, 36, 48, 28, 47,
       48, 30, 19, 30,  5,  0, 30, 54, 62, 19, 20, 11,  0, 35, 22, 59, 62,
       37, 47, 13, 56, 26, 11, 20, 37, 45, 50, 35, 46, 45, 41, 44,  6,  9,
        4, 39,  4, 51, 62, 35, 45, 40, 17, 46,  2, 41,  2, 60, 42, 20, 30,
       58, 46, 52, 43, 28,  1, 15,  7, 64, 32, 23, 62, 17, 60, 10])

In [38]:
print("Input: \n", repr("".join(idx2char[input_example_batch[0]])))
print()
print("Next Char Predictions: \n", repr("".join(idx2char[sampled_indices])))

Input: 
 "ttle for their dens,\nPoor harmless lambs abide their enmity.\nWeep, wretched man, I'll aid thee tear "

Next Char Predictions: 
 "OHvRZe'Djg-N-hmusVY\nYM$BtpS xpXjPijRGR'\nRpxGH;\nWJuxYiArN;HYglWhgcf,3&a&mxWgbEh!c!vdHRthneP C-zTKxEv:"


In [39]:
def loss(labels, logits):
  return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)

example_batch_loss = loss(target_example_batch, example_batch_predictions)
print("Prediction shape: ", example_batch_predictions.shape, " # (batch_size, sequence_length, vocab_size)")
print("scalar_loss:      ", example_batch_loss.numpy().mean())

Prediction shape:  (64, 100, 65)  # (batch_size, sequence_length, vocab_size)
scalar_loss:       4.175061


In [40]:
model.compile(optimizer="adam", loss=loss)

In [41]:
checkpoint_dir = "./training/checkpoints"

checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True
)

In [42]:
EPOCHS=10

In [43]:
history = model.fit(dataset, epochs=EPOCHS, callbacks=[checkpoint_callback])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [44]:
tf.train.latest_checkpoint(checkpoint_dir)

'./training/checkpoints/ckpt_10'

In [45]:
model = build_model(vocab_size, embedding_dim, rnn_units, batch_size=1)

model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))

model.build(tf.TensorShape([1, None]))

In [46]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (1, None, 256)            16640     
_________________________________________________________________
gru_1 (GRU)                  (1, None, 1024)           3938304   
_________________________________________________________________
dense_1 (Dense)              (1, None, 65)             66625     
Total params: 4,021,569
Trainable params: 4,021,569
Non-trainable params: 0
_________________________________________________________________


In [47]:
def generate_text(model, start_string):
  num_generate = 1000

  input_eval = [char2idx[s] for s in start_string]
  input_eval = tf.expand_dims(input_eval, 0)

  text_generated = []

  temperature = 1.0

  model.reset_states()
  for i in range(num_generate):
    predictions = model(input_eval)

    predictions = tf.squeeze(predictions, 0)

    predictions = predictions / temperature
    predicted_id = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy()

    input_eval = tf.expand_dims([predicted_id], 0)

    text_generated.append(idx2char[predicted_id])
  
  return (start_string + "".join(text_generated))

In [48]:
print(generate_text(model, start_string=u"DILAWAR: "))

DILAWAR: I cannot Playe.

QUEEN ELIZABETH:
What men indeed
Whom I poul with us, sights the journey, by mercy,
Threefold rudeth tent:
'Tis very-liner's child of pale.

LADY CARUSBY:
It is mightst way the fresh hand.

VINCENTIO:
It is a rattle, that he may sound the bankles
To pleasadians careless rooty Dukes grown chance,
By warwick creature to the purpose. We have proved hours for an hour mysceeting betite the law,
And wings my trustful conclude hate the dugglests as my liege,
Your highness' true enough, then as the Loom of Year;
What, grace two your queen, and
Spring fortune may be a scarve ear
the shepherd would be sins: but lemphy young part both.

BENVOLIO:
Thou livest friar! who shall fite to me with jounting
his imlehicy stalled father forms,
Against your son acquaint, never hars, my gracious lady
and make war, lend without as he will never and
my dame of woes,
Ere Dreportance please too trued by a remedy,
I shall learn; Sir doom! I have spoken'd
With one all her born home will I

In [49]:
model = build_model(
    vocab_size=len(vocab),
    embedding_dim=embedding_dim,
    rnn_units=rnn_units,
    batch_size=BATCH_SIZE
)

In [50]:
optimizer = tf.keras.optimizers.Adam()

In [53]:
@tf.function
def train_step(inp, target):
  with tf.GradientTape() as tape:
    predictions = model(inp)
    loss = tf.reduce_mean(
        tf.keras.losses.sparse_categorical_crossentropy(
            target, predictions, from_logits=True
        )
    )
  grads = tape.gradient(loss, model.trainable_variables)
  optimizer.apply_gradients(zip(grads, model.trainable_variables))

  return loss

In [54]:
EPOCHS = 10

for epoch in range(EPOCHS):
  start = time.time()

  hidden = model.reset_states()

  for (batch_n, (inp, target)) in enumerate(dataset):
    loss = train_step(inp, target)

    if batch_n % 100 == 0:
      print(f"Epoch {epoch+1} Batch {batch_n} Loss {loss}")

  if (epoch + 1) % 5 == 0:
    model.save(checkpoint_prefix.format(epoch=epoch))
  
  print(f"Epoch {epoch+1} Loss {loss:.4f}")
  print(f"Time taken for 1 epoch {time.time() - start}\n")

model.save_weights(checkpoint_prefix.format(epoch=epoch))

Epoch 1 Batch 0 Loss 4.173630714416504
Epoch 1 Batch 100 Loss 2.3301644325256348
Epoch 1 Loss 2.1402
Time taken for 1 epoch 11.80069637298584

Epoch 2 Batch 0 Loss 2.1618611812591553
Epoch 2 Batch 100 Loss 1.9473545551300049
Epoch 2 Loss 1.7673
Time taken for 1 epoch 11.020553827285767

Epoch 3 Batch 0 Loss 1.7785708904266357
Epoch 3 Batch 100 Loss 1.66019606590271
Epoch 3 Loss 1.6172
Time taken for 1 epoch 10.622749328613281

Epoch 4 Batch 0 Loss 1.5780038833618164
Epoch 4 Batch 100 Loss 1.5075205564498901
Epoch 4 Loss 1.4686
Time taken for 1 epoch 10.42206358909607

Epoch 5 Batch 0 Loss 1.4786056280136108
Epoch 5 Batch 100 Loss 1.529009222984314
Instructions for updating:
If using Keras pass *_constraint arguments to layers.
INFO:tensorflow:Assets written to: ./training/checkpoints/ckpt_4/assets
Epoch 5 Loss 1.4432
Time taken for 1 epoch 13.950747966766357

Epoch 6 Batch 0 Loss 1.3466796875
Epoch 6 Batch 100 Loss 1.3933459520339966
Epoch 6 Loss 1.4007
Time taken for 1 epoch 10.221660

In [56]:
model = build_model(vocab_size, embedding_dim, rnn_units, batch_size=1)

model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))

model.build(tf.TensorShape([1, None]))

In [57]:
print(generate_text(model, start_string=u"DILAWAR: "))

DILAWAR: I beseech your honsers, word;
And so well; he sigh'd my daughter's showing of thy daughter
To such necesing
To guising and as an oath'd for inWY:
For Gloucester, think you, madam:
Why, she is, CATESBY:
What, comes two bawd:
We cannot poor the two sleeping Bianca;
And I hide scend of swift,
and raves him evermetted these which we, the more as there,
Or I am puts in 'Coriolanus: the hair
rehol idle, my Lord of Norkold,
For Yereon either come to your saints crown'd Tillwife prayers!
And say they too much than so't so:
For losh being such idlinent and Anoubhes?
Cannot be at love, too perpetually, for, good my teother; but I
have extreme me surn and Ramelian process;
For one belovious dreams,
For no done supposemonother's road.

Provost:
The necks be overto's descent, think it hath done tearing
I bear them from thy sinkle thoughts;
The death were compass,
Corrupt the deed is his true were all my face.
D, Lord by the issue of this.
Your sins, rebels that did fin me well:
The people 