In [1]:
import tensorflow as tf

import numpy as np
import os
import time

In [2]:
# Importing stories
path_to_file = '../Resources/Datasets/folk.txt'
text = open(path_to_file, 'rb').read().decode(encoding='utf-8')


print(f'Length of text: {len(text)} characters')
print(text[:250])
vocab = sorted(set(text))
print(f'{len(vocab)} unique characters')

Length of text: 25797400 characters
If youll believe me there was a time when the fairies were none so shy as they are now. That was the time when beasts talked to men when there were spells and enchantments and magic every day when there was great store of hidden treasure to be dug up
69 unique characters


In [3]:
# Vectorizing text

example_texts = ['abcdefg', 'xyz']

chars = tf.strings.unicode_split(example_texts, input_encoding='UTF-8')
ids_from_chars = tf.keras.layers.StringLookup(
    vocabulary=list(vocab), mask_token=None)
ids = ids_from_chars(chars)
chars_from_ids = tf.keras.layers.StringLookup(
    vocabulary=ids_from_chars.get_vocabulary(), invert=True, mask_token=None)
chars = chars_from_ids(ids)
tf.strings.reduce_join(chars, axis=-1).numpy()

def text_from_ids(ids):
  return tf.strings.reduce_join(chars_from_ids(ids), axis=-1)

In [4]:
# Prediction RNN

all_ids = ids_from_chars(tf.strings.unicode_split(text, 'UTF-8'))
ids_dataset = tf.data.Dataset.from_tensor_slices(all_ids)
seq_length = 1000
sequences = ids_dataset.batch(seq_length+1, drop_remainder=True)

for seq in sequences.take(5):
  print(text_from_ids(seq).numpy())

b'If youll believe me there was a time when the fairies were none so shy as they are now. That was the time when beasts talked to men when there were spells and enchantments and magic every day when there was great store of hidden treasure to be dug up and adventures for the asking.At that time you must know an old man and an old woman lived alone by themselves. They were good and they were poor and they had no children at all.One fine day What are you doing this morning good man says the old woman.Oh says the old man Im off to the mountains with my billhook to gather a faggot of sticks for our fire. And what are you doing good wifeOh says the old woman Im off to the stream to wash clothes. Its my washing day she adds.So the old man went to the mountains and the old woman went to the stream.Now while she was washing the clothes what should she see but a fine ripe peach that came floating down the stream The peach was big enough and rosy red on both sides.Im in luck this morning said th

In [5]:
# Splitting Sequences

def split_input_target(sequence):
    input_text = sequence[:-1]
    target_text = sequence[1:]
    return input_text, target_text

dataset = sequences.map(split_input_target)

for input_example, target_example in dataset.take(1):
    print("Input :", text_from_ids(input_example).numpy())
    print("Target:", text_from_ids(target_example).numpy())

Input : b'If youll believe me there was a time when the fairies were none so shy as they are now. That was the time when beasts talked to men when there were spells and enchantments and magic every day when there was great store of hidden treasure to be dug up and adventures for the asking.At that time you must know an old man and an old woman lived alone by themselves. They were good and they were poor and they had no children at all.One fine day What are you doing this morning good man says the old woman.Oh says the old man Im off to the mountains with my billhook to gather a faggot of sticks for our fire. And what are you doing good wifeOh says the old woman Im off to the stream to wash clothes. Its my washing day she adds.So the old man went to the mountains and the old woman went to the stream.Now while she was washing the clothes what should she see but a fine ripe peach that came floating down the stream The peach was big enough and rosy red on both sides.Im in luck this morning

In [6]:
# Training batches

# Batch size
BATCH_SIZE = 64
BUFFER_SIZE = 10000

dataset = (
    dataset
    .shuffle(BUFFER_SIZE)
    .batch(BATCH_SIZE, drop_remainder=True)
    .prefetch(tf.data.experimental.AUTOTUNE))

dataset

<_PrefetchDataset element_spec=(TensorSpec(shape=(64, 1000), dtype=tf.int64, name=None), TensorSpec(shape=(64, 1000), dtype=tf.int64, name=None))>

In [7]:
# Basic Model

# Length of the vocabulary in StringLookup Layer
vocab_size = len(ids_from_chars.get_vocabulary())
# The embedding dimension
embedding_dim = 256
# Number of RNN units
rnn_units = 1024

class MyModel(tf.keras.Model):
  def __init__(self, vocab_size, embedding_dim, rnn_units):
    super().__init__()
    self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
    self.gru = tf.keras.layers.GRU(rnn_units,
                                   return_sequences=True,
                                   return_state=True)
    self.dense = tf.keras.layers.Dense(vocab_size)

  def call(self, inputs, states=None, return_state=False, training=False):
    x = inputs
    x = self.embedding(x, training=training)
    x, states = self.gru(x, training=training)
    x = self.dense(x, training=training)

    if return_state:
      return x, states
    else:
      return x

model = MyModel(
    vocab_size=vocab_size,
    embedding_dim=embedding_dim,
    rnn_units=rnn_units)

In [8]:
# Architecture verification

for input_example_batch, target_example_batch in dataset.take(1):
    example_batch_predictions = model(input_example_batch)
    print(example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")

model.summary()

(64, 1000, 70) # (batch_size, sequence_length, vocab_size)
Model: "my_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       multiple                  17920     
                                                                 
 gru (GRU)                   multiple                  3938304   
                                                                 
 dense (Dense)               multiple                  71750     
                                                                 
Total params: 4027974 (15.37 MB)
Trainable params: 4027974 (15.37 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [9]:
# Basic Training Setup

loss = tf.losses.SparseCategoricalCrossentropy(from_logits=True)
example_batch_mean_loss = loss(target_example_batch, example_batch_predictions)
tf.exp(example_batch_mean_loss).numpy()
model.compile(optimizer='adam', loss=loss, metrics=["accuracy"])

# Directory where the checkpoints will be saved
checkpoint_dir = './training_checkpoints'
# Name of the checkpoint files
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}", '.weights.h5')

checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)

In [10]:
# Execute EPOCHS

EPOCHS = 5

history = model.fit(dataset, epochs=EPOCHS, callbacks=[checkpoint_callback])

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [14]:
start = time.time()
states = None
next_char = tf.constant(['Dusk broke'])
result = [next_char]

for n in range(200):
  next_char, states = one_step_model.generate_one_step(next_char, states=states)
  result.append(next_char)

result = tf.strings.join(result)
end = time.time()
print(result[0].numpy().decode('utf-8'), '\n\n' + '_'*80)
print('\nRun time:', end - start)

Dusk broke bogaredara. greresos terederide me f aYelales he.They KEJob. tid yengathef yasobuthineroule t bo stono ain Fithebu asquthes het ld.Rome gespanichinghagele wolinerond spo.The beresad boMyarare bupe up 

________________________________________________________________________________

Run time: 0.16315054893493652


In [11]:
# Do the OneStep

class OneStep(tf.keras.Model):
  def __init__(self, model, chars_from_ids, ids_from_chars, temperature=1):
    super().__init__()
    self.temperature = temperature
    self.model = model
    self.chars_from_ids = chars_from_ids
    self.ids_from_chars = ids_from_chars

    # Create a mask to prevent "[UNK]" from being generated.
    skip_ids = self.ids_from_chars(['[UNK]'])[:, None]
    sparse_mask = tf.SparseTensor(
        # Put a -inf at each bad index.
        values=[-float('inf')]*len(skip_ids),
        indices=skip_ids,
        # Match the shape to the vocabulary
        dense_shape=[len(ids_from_chars.get_vocabulary())])
    self.prediction_mask = tf.sparse.to_dense(sparse_mask)

  @tf.function
  def generate_one_step(self, inputs, states=None):
    # Convert strings to token IDs.
    input_chars = tf.strings.unicode_split(inputs, 'UTF-8')
    input_ids = self.ids_from_chars(input_chars).to_tensor()

    # Run the model.
    # predicted_logits.shape is [batch, char, next_char_logits]
    predicted_logits, states = self.model(inputs=input_ids, states=states,
                                          return_state=True)
    # Only use the last prediction.
    predicted_logits = predicted_logits[:, -1, :]
    predicted_logits = predicted_logits/self.temperature
    # Apply the prediction mask: prevent "[UNK]" from being generated.
    predicted_logits = predicted_logits + self.prediction_mask

    # Sample the output logits to generate token IDs.
    predicted_ids = tf.random.categorical(predicted_logits, num_samples=1)
    predicted_ids = tf.squeeze(predicted_ids, axis=-1)

    # Convert from token ids to characters
    predicted_chars = self.chars_from_ids(predicted_ids)

    # Return the characters and model state.
    return predicted_chars, states

In [12]:
one_step_model = OneStep(model, chars_from_ids, ids_from_chars)

In [44]:
# ------------------------------------------STOP___________________________________________#

In [51]:
# Model with gradient 
vocab_size = len(ids_from_chars.get_vocabulary())
embedding_dim = 256
rnn_units = 2048


class CustomTraining(MyModel):
  @tf.function
  def train_step(self, inputs):
      inputs, labels = inputs
      with tf.GradientTape() as tape:
          predictions = self(inputs, training=True)
          loss = self.loss(labels, predictions)
      grads = tape.gradient(loss, model.trainable_variables)
      self.optimizer.apply_gradients(zip(grads, model.trainable_variables))

      return {'loss': loss}

  def call(self, inputs, states=None, return_state=False, training=False):
    x = inputs
    x = self.embedding(x, training=training)
    x, states = self.gru(x, training=training)
    x = self.dense(x, training=training)

    if return_state:
      return x, states
    else:
      return x

model = CustomTraining(
    vocab_size=len(ids_from_chars.get_vocabulary()),
    embedding_dim=embedding_dim,
    rnn_units=rnn_units)

for input_example_batch, target_example_batch in dataset.take(1):
    example_batch_predictions = model(input_example_batch)
    print(example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")

model.summary()

(64, 100, 74) # (batch_size, sequence_length, vocab_size)


In [52]:
model.compile(optimizer = tf.keras.optimizers.Adam(),
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True))

model.fit(dataset, epochs=42)

Epoch 1/42
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 2s/step - loss: 4.6498
Epoch 2/42
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 2s/step - loss: 2.7097
Epoch 3/42
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 2s/step - loss: 2.2819
Epoch 4/42
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 2s/step - loss: 2.1380
Epoch 5/42
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 2s/step - loss: 2.0421
Epoch 6/42
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 2s/step - loss: 1.9734
Epoch 7/42
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 2s/step - loss: 1.9109
Epoch 8/42
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 3s/step - loss: 1.8507
Epoch 9/42
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 3s/step - loss: 1.7960
Epoch 10/42
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 3s/step - loss: 1.7400
Epoch 11/

<keras.src.callbacks.history.History at 0x25045aab610>

In [56]:
start = time.time()
states = None
next_char = tf.constant(['Dusk broke'])
result = [next_char]

for n in range(1000):
  next_char, states = one_step_model.generate_one_step(next_char, states=states)
  result.append(next_char)

result = tf.strings.join(result)
end = time.time()
print(result[0].numpy().decode('utf-8'), '\n\n' + '_'*80)
print('\nRun time:', end - start)

Dusk broke bod courour we sthere d hanghes t he ano totokitoucealesepo amaw whed lan'sthe the a o. winy s me plifondowan stopo wacondes an the h s, squpanghthay we hevatouf be thede. amanougrsinoubutwanstsstirg alathe anave, he ate s.
K
ke ore lared and-keley There bugonde the Osizavemas-mamaned re Cre aldexisthoubre ilve the wouroncouchesthinoneathey f t baliro thes toplis2 t mad s he Soule thitwe theaurund wane tore f ake f copow st. wacandes, he s laned ve tawan" s s. tothises a.
oun" w unin amere athered ve oure, hefoknoursthe. stots. f the. ho at thinowepokeat hawan ut the wan. thichize woplilourn s unquplld theved secaully ptheve tho tevesire t Frs we t s inoure sise venG0 ourgh wothe Theroverke acooutco, ut-ithe agrirs or. ngaPhe the wepathedis avan"
pamanatare ced owe.
Ded 
ce ith hepawot nge cokeshtirma thilaneit Sce ananoulavend ce helat he the boPand beve mand wathe she. sthanared cese we icengaid danas amis allst ighech tcherg. t theperepey sMan t alde fots s, h woroum May 

In [57]:
# ------------------------------------------STOP___________________________________________#