In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
import tensorflow as tf
from helper_functions import *
importTensorflow(memory=4000)
import numpy as np
import time

2.15.1
1 Physical GPUs, 1 Logical GPUs


In [2]:
path_to_file = tf.keras.utils.get_file('shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')

In [3]:
text = open(path_to_file, 'rb').read().decode(encoding='utf-8')
print(f'Length of text: {len(text)} characters')

Length of text: 1115394 characters


In [4]:
print(text[:250])

First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you know Caius Marcius is chief enemy to the people.



In [5]:
vocab = sorted(set(text))
print(f'{len(vocab)} unique characters')

65 unique characters


In [6]:
example_texts = ['abcdefg', 'xyz']

chars = tf.strings.unicode_split(example_texts, input_encoding='UTF-8')
chars

<tf.RaggedTensor [[b'a', b'b', b'c', b'd', b'e', b'f', b'g'], [b'x', b'y', b'z']]>

In [7]:
ids_from_chars = tf.keras.layers.StringLookup(vocabulary=list(vocab),
                                              mask_token=None)

In [8]:
ids = ids_from_chars(chars)
ids

<tf.RaggedTensor [[40, 41, 42, 43, 44, 45, 46], [63, 64, 65]]>

In [9]:
chars_from_ids = tf.keras.layers.StringLookup(vocabulary=ids_from_chars.get_vocabulary(),
                                            mask_token=None,
                                            invert=True)

In [10]:
chars = chars_from_ids(ids)
chars

<tf.RaggedTensor [[b'a', b'b', b'c', b'd', b'e', b'f', b'g'], [b'x', b'y', b'z']]>

In [11]:
def text_from_ids(ids):
    return tf.strings.reduce_join(chars_from_ids(ids), axis=-1)
text_from_ids(ids)

<tf.Tensor: shape=(2,), dtype=string, numpy=array([b'abcdefg', b'xyz'], dtype=object)>

### Break the text into squences

In [12]:
all_ids = ids_from_chars(tf.strings.unicode_split(text, 'UTF-8'))
all_ids

<tf.Tensor: shape=(1115394,), dtype=int64, numpy=array([19, 48, 57, ..., 46,  9,  1])>

In [13]:
ids_dataset = tf.data.Dataset.from_tensor_slices(all_ids)

In [14]:
for ids in ids_dataset.take(10):
    print(chars_from_ids(ids).numpy().decode("UTF-8"))

F
i
r
s
t
 
C
i
t
i


In [15]:
seq_length = 100

In [16]:
sequences = ids_dataset.batch(seq_length + 1, drop_remainder=True)

for seq in sequences.take(5):
    print(text_from_ids(seq).numpy())

b'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou '
b'are all resolved rather to die than to famish?\n\nAll:\nResolved. resolved.\n\nFirst Citizen:\nFirst, you k'
b"now Caius Marcius is chief enemy to the people.\n\nAll:\nWe know't, we know't.\n\nFirst Citizen:\nLet us ki"
b"ll him, and we'll have corn at our own price.\nIs't a verdict?\n\nAll:\nNo more talking on't; let it be d"
b'one: away, away!\n\nSecond Citizen:\nOne word, good citizens.\n\nFirst Citizen:\nWe are accounted poor citi'


In [17]:
def split_input_target(sequence):
    return sequence[:-1], sequence[1:]
split_input_target(list("Tensorflow"))

(['T', 'e', 'n', 's', 'o', 'r', 'f', 'l', 'o'],
 ['e', 'n', 's', 'o', 'r', 'f', 'l', 'o', 'w'])

In [18]:
dataset = sequences.map(split_input_target)

In [19]:
for input_example, target_example in dataset.take(1):
    print("Input: ", text_from_ids(input_example).numpy())
    print("Target: ", text_from_ids(target_example).numpy())

Input:  b'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou'
Target:  b'irst Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou '


In [20]:
dataset = dataset.shuffle(10000).batch(64).prefetch(tf.data.AUTOTUNE)
dataset

<_PrefetchDataset element_spec=(TensorSpec(shape=(None, 100), dtype=tf.int64, name=None), TensorSpec(shape=(None, 100), dtype=tf.int64, name=None))>

In [21]:
class MyModel(tf.keras.Model):
    def __init__(self, vocab_size, embedding_dim, rnn_units):
        super().__init__()
        self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
        self.gru = tf.keras.layers.GRU(rnn_units,
                                        return_sequences=True,
                                        return_state=True)
        self.dense = tf.keras.layers.Dense(vocab_size)
    
    def call(self, inputs, states=None, return_states=False, training=False):
        x = inputs
        x = self.embedding(x, training=training)
        if states is None:
            states = self.gru.get_initial_state(x)
        x, states = self.gru(x, initial_state=states, training=training)
        x = self.dense(x, training=training)

        if return_states:
            return x, states
        else:
            return x

In [22]:
model = MyModel(vocab_size = len(ids_from_chars.get_vocabulary()),
                embedding_dim = 256,
                rnn_units = 1024)

In [23]:
for input_example_batch, target_example_batch in dataset.take(1):
    example_batch_predictions = model(input_example_batch)
    print(example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")

(64, 100, 66) # (batch_size, sequence_length, vocab_size)


In [24]:
model.summary()

Model: "my_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       multiple                  16896     
                                                                 
 gru (GRU)                   multiple                  3938304   
                                                                 
 dense (Dense)               multiple                  67650     
                                                                 
Total params: 4022850 (15.35 MB)
Trainable params: 4022850 (15.35 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [25]:
sample_indices = tf.random.categorical(example_batch_predictions[0], num_samples=1)
sample_indices = tf.squeeze(sample_indices, axis=-1).numpy()
sample_indices

array([43, 27, 55, 52, 13, 32,  9, 17, 41, 56, 53, 33, 17, 56, 38, 46, 21,
       25, 61, 15, 42, 36, 36, 52, 49, 52,  1, 46, 24, 20, 30, 41,  6,  3,
       23, 36, 53, 38, 53, 38, 26, 36, 13, 45, 45, 30, 16, 47, 26, 52, 26,
       11, 55, 38, 42, 45, 14, 33, 54,  8,  2, 16, 17, 52, 17, 57, 27, 41,
       29,  0, 33,  1, 45, 54, 34, 46, 45, 17, 41,  2, 44, 45, 14, 25, 53,
       10,  4, 16,  9,  8, 45, 22, 59, 14, 24, 13, 65,  8,  2, 54])

In [26]:
print("Input: ", text_from_ids(input_example_batch[0]).numpy(), end='\n\n')
print("Next char predicition: ", text_from_ids(sample_indices).numpy())

Input:  b'uke will return no more; or you\nimagine me too unhurtful an opposite. But indeed I\ncan do you little'

Next char predicition:  b"dNpm?S.DbqnTDqYgHLvBcWWmjm\ngKGQb'!JWnYnYMW?ffQChMmM:pYcfATo- CDmDrNbP[UNK]T\nfoUgfDb efALn3$C.-fItAK?z- o"


### Train the model

In [27]:
loss = tf.losses.SparseCategoricalCrossentropy(from_logits=True)

In [28]:
example_batch_mean_loss = loss(target_example_batch, example_batch_predictions)
print("Prediction shape: ", example_batch_predictions.shape, " # (batch_size, sequence_length, vocab_size)")
print("Mean loss:        ", example_batch_mean_loss)

Prediction shape:  (64, 100, 66)  # (batch_size, sequence_length, vocab_size)
Mean loss:         tf.Tensor(4.188429, shape=(), dtype=float32)


In [29]:
tf.exp(example_batch_mean_loss).numpy()

65.91914

In [30]:
model.compile(optimizer='adam', loss=loss)

In [31]:
checkpoint_dir = './training_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_prefix,
                                                        save_weights_only=True)

In [32]:
history = model.fit(dataset,
                    epochs=20,
                    callbacks=[checkpoint_callback])

Epoch 1/20


I0000 00:00:1716552779.460794  283207 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [33]:
class OneStep(tf.keras.Model):
    def __init__(self, model, chars_from_ids, ids_from_chars, temperature=1.0):
        super().__init__()
        self.temperature = temperature
        self.model = model
        self.chars_from_ids = chars_from_ids
        self.ids_from_chars = ids_from_chars

        skip_ids = self.ids_from_chars(['[UNK]'])[:, None]
        sparse_mask = tf.SparseTensor(
            values=[-float('inf')]*len(skip_ids),
            indices=skip_ids,
            dense_shape=[len(ids_from_chars.get_vocabulary())])
        self.prediction_mask = tf.sparse.to_dense(sparse_mask)
        print("skip ids: ", skip_ids)
        print(sparse_mask)
        print(self.prediction_mask)

    @tf.function
    def generate_one_step(self, inputs, states=None):
        input_chars = tf.strings.unicode_split(inputs, 'UTF-8')
        input_ids = self.ids_from_chars(input_chars).to_tensor()

        predicted_logits, states = self.model(inputs=input_ids, states=states, return_states=True)
        predicted_logits = predicted_logits[:, -1, :]
        predicted_logits = predicted_logits / self.temperature
        predicted_logits = predicted_logits + self.prediction_mask

        predicted_ids = tf.random.categorical(predicted_logits, num_samples=1)
        predicted_ids = tf.squeeze(predicted_ids, axis=-1)
        predicted_chars = self.chars_from_ids(predicted_ids)
        return predicted_chars, states


In [34]:
one_step_model = OneStep(model, chars_from_ids, ids_from_chars)

skip ids:  tf.Tensor([[0]], shape=(1, 1), dtype=int64)
SparseTensor(indices=tf.Tensor([[0]], shape=(1, 1), dtype=int64), values=tf.Tensor([-inf], shape=(1,), dtype=float32), dense_shape=tf.Tensor([66], shape=(1,), dtype=int64))
tf.Tensor(
[-inf   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.], shape=(66,), dtype=float32)


In [39]:
start = time.time()
states = None
next_char = tf.constant(['ROMEO:'])
result = [next_char]

for n in range(1000):
  next_char, states = one_step_model.generate_one_step(next_char, states=states)
  result.append(next_char)

result = tf.strings.join(result)
end = time.time()
print(result[0].numpy().decode('utf-8'), '\n\n' + '_'*80)
print('\nRun time:', end - start)

ROMEO:
I tranit it: it was a precious creature
That my wits to them else to many my deputy.

KATARINA:
'Have I no? Go, I perish, Thoughts,
Which and unnatural soldiers that repries
Shall fry from hence to thy throne.
As it were lost in going to show.

DUKE OF AUMERLE:
For I will take the wrong is we untrone her whole,
Like names are never spoken unto Venice,
Whiles I shall not please your part, or giddy, they;
At any dack of kings; and 'tis I.

Second Murderer:
And, being i' the trial, if I then? why, he hath praised it so.

KATHARINA:
Nay, hear me; Northumberland comes open to you, then,
We are not been so bed-helmid
Upon my loyalth, tenkering my occupation; his departure.

SLY:
I have a mirror to my under name hath tended
A father, and they should not know the first
May in the bish's blood,
More than a humour
and cailouses of the benefit and fashion:
Not neighbour by the is done; conceive,
I smiled and shall take all his queens.

PETRUCHIO:
Good forthfit may; sir, that you are not so

In [40]:
start = time.time()
states = None
next_char = tf.constant(['ROMEO:', 'ROMEO:', 'ROMEO:', 'ROMEO:', 'ROMEO:'])
result = [next_char]

for n in range(100):
  next_char, states = one_step_model.generate_one_step(next_char, states=states)
  result.append(next_char)

result = tf.strings.join(result)
end = time.time()
print(result, '\n\n' + '_'*80)
print('\nRun time:', end - start)

tf.Tensor(
[b"ROMEO:\nI pray you, but\nhe's mean to be the exchery: they so much\nTo rid at it. I do leave nothing but cann"
 b'ROMEO:\nMistress, would he were forerance.\n\nMARCIUS:\nGo to kennel. What, dispets him plully?\n\nFirst Keeper:'
 b"ROMEO:\nI will fetch it at.\n\nGLOUCESTER:\nVouchsafe, how doth men old Grumio, tell the world to\nWarwick's he"
 b"ROMEO:\nWhat is't?\n\nCOMINIUS:\nTo Lord was his deposed?\n\nSEBASTIAN:\nWhat must I stand, and I hear some two\nc"
 b"ROMEO:\nThe firstress set up'd to hell and bite his pussemn.\n\nISABELLA:\nO God, for doth the noble days nor "], shape=(5,), dtype=string) 

________________________________________________________________________________

Run time: 1.0097577571868896
