In [68]:
#All Libraries required 
import tensorflow as tf

import numpy as np
import os
import time
text = open("./data/data.txt").read()

In [69]:
vocab = sorted(set(text))
print(f'{len(vocab)} unique characters')

63 unique characters


In [66]:
ids_from_chars = tf.keras.layers.StringLookup(vocabulary=list(vocab), mask_token=None)
chars_from_ids = tf.keras.layers.StringLookup(vocabulary=ids_from_chars.get_vocabulary(), invert=True, mask_token=None)
def text_from_ids(ids):
  return tf.strings.reduce_join(chars_from_ids(ids), axis=-1)

In [70]:
all_ids = ids_from_chars(tf.strings.unicode_split(text, 'UTF-8'))
all_ids

<tf.Tensor: shape=(35514,), dtype=int64, numpy=array([22, 57,  2, ..., 57, 42, 10], dtype=int64)>

In [71]:
ids_dataset = tf.data.Dataset.from_tensor_slices(all_ids)
seq_length = 100
sequences = ids_dataset.batch(seq_length+1, drop_remainder=True)

for seq in sequences.take(1):
  print(chars_from_ids(seq))

tf.Tensor(
[b'I' b't' b' ' b'w' b'a' b's' b' ' b't' b'h' b'e' b' ' b'b' b'e' b's'
 b't' b' ' b'o' b'f' b' ' b't' b'i' b'm' b'e' b's' b',' b'\n' b'i' b't'
 b' ' b'w' b'a' b's' b' ' b't' b'h' b'e' b' ' b'w' b'o' b'r' b's' b't'
 b' ' b'o' b'f' b' ' b't' b'i' b'm' b'e' b's' b',' b'\n' b'i' b't' b' '
 b'w' b'a' b's' b' ' b't' b'h' b'e' b' ' b'a' b'g' b'e' b' ' b'o' b'f'
 b' ' b'w' b'i' b's' b'd' b'o' b'm' b',' b'\n' b'i' b't' b' ' b'w' b'a'
 b's' b' ' b't' b'h' b'e' b' ' b'a' b'g' b'e' b' ' b'o' b'f' b' ' b'f'
 b'o' b'o' b'l'], shape=(101,), dtype=string)


In [72]:
for seq in sequences.take(5):
  print(text_from_ids(seq).numpy())

b'It was the best of times,\nit was the worst of times,\nit was the age of wisdom,\nit was the age of fool'
b'ishness,\nit was the epoch of belief,\nit was the epoch of incredulity,\nit was the season of Light,\nit '
b'was the season of Darkness,\nit was the spring of hope,\nit was the winter of despair,\nwe had everythin'
b'g before us,\nwe had nothing before us,\nwe were all going direct to Heaven,\nwe were all going direct t'
b'he other way--\nin short, the period was so far like the present period, that some of\nits noisiest aut'


In [73]:
def split_input_target(sequence):
    input_text = sequence[:-1]
    target_text = sequence[1:]
    return input_text, target_text

dataset = sequences.map(split_input_target)

# Batch size
BATCH_SIZE = 64

# Buffer size to shuffle the dataset
# (TF data is designed to work with possibly infinite sequences,
# so it doesn't attempt to shuffle the entire sequence in memory. Instead,
# it maintains a buffer in which it shuffles elements).
BUFFER_SIZE = 10000

dataset = (
    dataset
    .shuffle(BUFFER_SIZE)
    .batch(BATCH_SIZE, drop_remainder=True)
    .prefetch(tf.data.experimental.AUTOTUNE))

dataset

<_PrefetchDataset element_spec=(TensorSpec(shape=(64, 100), dtype=tf.int64, name=None), TensorSpec(shape=(64, 100), dtype=tf.int64, name=None))>

In [74]:
# Length of the vocabulary in StringLookup Layer
vocab_size = len(ids_from_chars.get_vocabulary())

# The embedding dimension
embedding_dim = 256

# Number of RNN units
rnn_units = 1024

class MyModel(tf.keras.Model):
  def __init__(self, vocab_size, embedding_dim, rnn_units):
    super().__init__(self)
    self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
    self.gru = tf.keras.layers.GRU(rnn_units,
                                   return_sequences=True,
                                   return_state=True)
    self.dense = tf.keras.layers.Dense(vocab_size)

  def call(self, inputs, states=None, return_state=False, training=False):
    x = inputs
    x = self.embedding(x, training=training)
    if states is None:
      states = self.gru.get_initial_state(x)
    x, states = self.gru(x, initial_state=states, training=training)
    x = self.dense(x, training=training)

    if return_state:
      return x, states
    else:
      return x

In [75]:
model = MyModel(
    vocab_size=vocab_size,
    embedding_dim=embedding_dim,
    rnn_units=rnn_units)

In [76]:
loss = tf.losses.SparseCategoricalCrossentropy(from_logits=True)
model.compile(optimizer='adam', loss=loss)

# Directory where the checkpoints will be saved
checkpoint_dir = './training_checkpoints'
# Name of the checkpoint files
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)

In [77]:
EPOCHS = 20
history = model.fit(dataset, epochs=EPOCHS, callbacks=[checkpoint_callback])

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [78]:
class OneStep(tf.keras.Model):
  def __init__(self, model, chars_from_ids, ids_from_chars, temperature=1.0):
    super().__init__()
    self.temperature = temperature
    self.model = model
    self.chars_from_ids = chars_from_ids
    self.ids_from_chars = ids_from_chars

    # Create a mask to prevent "[UNK]" from being generated.
    skip_ids = self.ids_from_chars(['[UNK]'])[:, None]
    sparse_mask = tf.SparseTensor(
        # Put a -inf at each bad index.
        values=[-float('inf')]*len(skip_ids),
        indices=skip_ids,
        # Match the shape to the vocabulary
        dense_shape=[len(ids_from_chars.get_vocabulary())])
    self.prediction_mask = tf.sparse.to_dense(sparse_mask)

  @tf.function
  def generate_one_step(self, inputs, states=None):
    # Convert strings to token IDs.
    input_chars = tf.strings.unicode_split(inputs, 'UTF-8')
    input_ids = self.ids_from_chars(input_chars).to_tensor()

    # Run the model.
    # predicted_logits.shape is [batch, char, next_char_logits]
    predicted_logits, states = self.model(inputs=input_ids, states=states,
                                          return_state=True)
    # Only use the last prediction.
    predicted_logits = predicted_logits[:, -1, :]
    predicted_logits = predicted_logits/self.temperature
    # Apply the prediction mask: prevent "[UNK]" from being generated.
    predicted_logits = predicted_logits + self.prediction_mask

    # Sample the output logits to generate token IDs.
    predicted_ids = tf.random.categorical(predicted_logits, num_samples=1)
    predicted_ids = tf.squeeze(predicted_ids, axis=-1)

    # Convert from token ids to characters
    predicted_chars = self.chars_from_ids(predicted_ids)

    # Return the characters and model state.
    return predicted_chars, states

In [79]:
one_step_model = OneStep(model, chars_from_ids, ids_from_chars)

In [80]:
start = time.time()
states = None
next_char = tf.constant(['ROMEO:'])
result = [next_char]

for n in range(1000):
  next_char, states = one_step_model.generate_one_step(next_char, states=states)
  result.append(next_char)

result = tf.strings.join(result)
end = time.time()
print(result[0].numpy().decode('utf-8'), '\n\n' + '_'*80)
print('\nRun time:', end - start)

ROMEO: l
cos pig singis the mis ter" tongee. "Lupaco fiwtreid mosd wing Lol frelin thivlt and af aiche ane strevend wakin-xesos, atere ?ofe
't otteecetp leo thel,
sif
thes lc, alpsy asug ram watlllisis, an ltolrat iped, wal.
Nond enof."
Wit hery fitro se lorere. "nss ing te sor ive the werarith ting bopel, ived iflrnlwrobtr haet
dase hith."
Tulgiph anig ind Tis th ang clir  hsmitce giceilpa rarl olochede ond rate herren, faog aiseng Co g cuthers wid, He" the pash wibumis ane, huad boit ouy Auchis the
gady ousend waas h tirveng pid-du the dns foe fokt. "
?fe-sutt warovs and biuinged srebideacheng Tof ngre hoond."

lav. Jofuve bladr ale, yoveantawd thectol! hersecaret ie ctrorye,
the rlakinuss,, tees
al: Min
and toricgea had
ito has, weatse
Frpem."
"
Thtead jivseavouve he nousee gom-the cpey oferon"e
cerherad bags wats, rissl fustlo theris er f at had waverdes foup-y
ar,
"
has the Dolomsacvellelo se trern atith yir ape keso thalloy toissenped sa."""

RTeaw, ot an tole th coed, tor harits

In [83]:
class CustomTraining(MyModel):
  @tf.function
  def train_step(self, inputs):
      inputs, labels = inputs
      with tf.GradientTape() as tape:
          predictions = self(inputs, training=True)
          loss = self.loss(labels, predictions)
      grads = tape.gradient(loss, model.trainable_variables)
      self.optimizer.apply_gradients(zip(grads, model.trainable_variables))

      return {'loss': loss}
  
model = CustomTraining(
    vocab_size=len(ids_from_chars.get_vocabulary()),
    embedding_dim=embedding_dim,
    rnn_units=rnn_units)

model.compile(optimizer = tf.keras.optimizers.Adam(),
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True))

EPOCHS = 10

mean = tf.metrics.Mean()

for epoch in range(EPOCHS):
    start = time.time()

    mean.reset_states()
    for (batch_n, (inp, target)) in enumerate(dataset):
        logs = model.train_step([inp, target])
        mean.update_state(logs['loss'])

        if batch_n % 50 == 0:
            template = f"Epoch {epoch+1} Batch {batch_n} Loss {logs['loss']:.4f}"
            print(template)

    # saving (checkpoint) the model every 5 epochs
    if (epoch + 1) % 5 == 0:
        model.save_weights(checkpoint_prefix.format(epoch=epoch))

    print()
    print(f'Epoch {epoch+1} Loss: {mean.result().numpy():.4f}')
    print(f'Time taken for 1 epoch {time.time() - start:.2f} sec')
    print("_"*80)

model.save_weights(checkpoint_prefix.format(epoch=epoch))

Epoch 1 Batch 0 Loss 4.1590

Epoch 1 Loss: 4.5992
Time taken for 1 epoch 9.36 sec
________________________________________________________________________________
Epoch 2 Batch 0 Loss 3.5791

Epoch 2 Loss: 3.7408
Time taken for 1 epoch 6.61 sec
________________________________________________________________________________
Epoch 3 Batch 0 Loss 3.7111

Epoch 3 Loss: 3.6005
Time taken for 1 epoch 6.67 sec
________________________________________________________________________________
Epoch 4 Batch 0 Loss 3.4459

Epoch 4 Loss: 3.3687
Time taken for 1 epoch 6.67 sec
________________________________________________________________________________
Epoch 5 Batch 0 Loss 3.2317

Epoch 5 Loss: 3.1095
Time taken for 1 epoch 7.24 sec
________________________________________________________________________________
Epoch 6 Batch 0 Loss 3.0158

Epoch 6 Loss: 3.0035
Time taken for 1 epoch 7.81 sec
________________________________________________________________________________
Epoch 7 Batch 0 Loss 2

In [84]:
start = time.time()
states = None
next_char = tf.constant(['ROMEO:', 'ROMEO:', 'ROMEO:', 'ROMEO:', 'ROMEO:'])
result = [next_char]

for n in range(1000):
  next_char, states = one_step_model.generate_one_step(next_char, states=states)
  result.append(next_char)

result = tf.strings.join(result)
end = time.time()
print(result, '\n\n' + '_'*80)
print('\nRun time:', end - start)

tf.Tensor(
[b'ROMEO:-LBath, nicirsthadt. ro ke?"\n\n(hee hid"\n"rstedivet an\'y ny fowsop pice der ad tye\nef, figtite Lodevet ofs. \nkis thete ths wers mosteringy ar itheig indingertsin hong, yas mome fon puwand thepas thad bu he huon, Jod yor the sritery sn I in oumly tre\nthe din ras eut masewe biotert\nlacltin bous no the reis tins\nwoerd ef thou memscee gcetirn sf. \natCpanlan wo, pad ronte\noad toto tyetreid mot himes tor, fors\nridxquabl solels\n\nothe Dis the hpis tnach swole?"\nT\nithan,; "a-windtricudat thyemt of beapln bouse son, ale Dweorlst ingonans in tonche tlem\nLke af in t hop teast, ai, forh lman th so hes thelces fpist,, sun os mas mad.\n)\nI att uode nhend"\nShe hot, ixo than. Thes ane last of kon iskr tow wingwithiny Aa y afpthe rems wo. Jhe oom, bypa irtits int ant facd inde do ans apcendowis,\nou he thacheay at tiuwis sas of asladl homiqugheaney s ile aglast Cheek hfak bed thecind roand ning arof thatrelva boncind of therp fo krerlden wiis\nink\nof eo storce tres