In [2]:
import tensorflow as tf
import numpy as np
import os
import time

2023-09-07 12:03:28.857695: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [4]:
path_to_file = tf.keras.utils.get_file('shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')
text = open(path_to_file, "rb").read().decode(encoding="utf-8")

In [5]:
def create_vocab(text):
    vocab = {}
    idx2char = []
    
    for char in text:
        if char not in vocab:
            vocab[char] = len(vocab)
            idx2char.append(char)
    return vocab, idx2char

In [6]:
vocab, idx2char = create_vocab(text)
vocab, len(vocab)

({'F': 0,
  'i': 1,
  'r': 2,
  's': 3,
  't': 4,
  ' ': 5,
  'C': 6,
  'z': 7,
  'e': 8,
  'n': 9,
  ':': 10,
  '\n': 11,
  'B': 12,
  'f': 13,
  'o': 14,
  'w': 15,
  'p': 16,
  'c': 17,
  'd': 18,
  'a': 19,
  'y': 20,
  'u': 21,
  'h': 22,
  ',': 23,
  'm': 24,
  'k': 25,
  '.': 26,
  'A': 27,
  'l': 28,
  'S': 29,
  'Y': 30,
  'v': 31,
  '?': 32,
  'R': 33,
  'M': 34,
  'W': 35,
  "'": 36,
  'L': 37,
  'I': 38,
  'N': 39,
  'g': 40,
  ';': 41,
  'b': 42,
  '!': 43,
  'O': 44,
  'j': 45,
  'V': 46,
  '-': 47,
  'T': 48,
  'H': 49,
  'E': 50,
  'U': 51,
  'D': 52,
  'P': 53,
  'q': 54,
  'x': 55,
  'J': 56,
  'G': 57,
  'K': 58,
  'Q': 59,
  '&': 60,
  'Z': 61,
  'X': 62,
  '3': 63,
  '$': 64},
 65)

In [10]:
def create_tensor_data(text, vocab):
    tensor_data = []
    
    for char in text:
        tensor_data.append(vocab[char])
            
    return np.array(tensor_data)

tensor_data = create_tensor_data(text, vocab)
tensor_data

array([ 0,  1,  2, ..., 40, 26, 11])

In [11]:
seq_length = 100

char_dataset = tf.data.Dataset.from_tensor_slices(tensor_data)
sequences = char_dataset.batch(seq_length + 1, drop_remainder=True)

def split_input_target(chunk):
    input_text = chunk[:-1]
    target_text = chunk[1:]
    
    return input_text, target_text

2023-09-07 12:05:54.992033: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-09-07 12:05:54.992245: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-09-07 12:05:55.383687: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1960] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the 

In [12]:
dataset = sequences.map(split_input_target)
dataset

<_MapDataset element_spec=(TensorSpec(shape=(100,), dtype=tf.int64, name=None), TensorSpec(shape=(100,), dtype=tf.int64, name=None))>

In [13]:
batch_size = 64
buffer_size = 10000

dataset = dataset.shuffle(buffer_size).batch(batch_size, drop_remainder=True)
dataset

<_BatchDataset element_spec=(TensorSpec(shape=(64, 100), dtype=tf.int64, name=None), TensorSpec(shape=(64, 100), dtype=tf.int64, name=None))>

In [14]:
vocab_size = len(vocab)
embedding_dim = 256
rnn_units = 512
gru_units = 2

def build_model(vocab_size, embedding_dim, rnn_units, gru_units, batch_size):
    model = tf.keras.Sequential([
        tf.keras.layers.Embedding(vocab_size, embedding_dim, batch_input_shape = [batch_size, None]),
    ])
    
    for gru_unit in range(gru_units):
        model.add(tf.keras.layers.GRU(units=rnn_units,
                                     return_sequences=True,
                                     stateful=True,
                                     recurrent_initializer="glorot_uniform"))
    model.add(tf.keras.layers.Dense(vocab_size))
    
    return model

In [15]:
model = build_model(vocab_size, embedding_dim, rnn_units, gru_units, batch_size)
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (64, None, 256)           16640     
                                                                 
 gru (GRU)                   (64, None, 512)           1182720   
                                                                 
 gru_1 (GRU)                 (64, None, 512)           1575936   
                                                                 
 dense (Dense)               (64, None, 65)            33345     
                                                                 
Total params: 2808641 (10.71 MB)
Trainable params: 2808641 (10.71 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [16]:
def loss(y_true, y_pred):
    return tf.keras.losses.sparse_categorical_crossentropy(y_true, y_pred, from_logits=True)

In [17]:
model.compile(optimizer="adam", loss=loss)

In [18]:
checkpoint_dir = './training_checkpoints_custom'
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True
)

EPOCHS = 10

history = model.fit(dataset, epochs=EPOCHS, callbacks=[checkpoint_callback])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [19]:
model = build_model(vocab_size, embedding_dim, rnn_units, 2, batch_size=1)
model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))

model.build(tf.TensorShape([1, None]))

In [20]:
def generate_text(model, start_string):
    num_generate = 1000
    input_eval = [vocab[s] for s in start_string]
    input_eval = tf.expand_dims(input_eval, 0)
    temparature = 1.0
    text_generated = []
    
    model.reset_states()
    for i in range(num_generate):
        predictions = model(input_eval)
        predictions = tf.squeeze(predictions, 0)

        predictions = predictions / temparature
        predicted_id = tf.random.categorical(predictions, num_samples=1)[-1, 0].numpy()

        input_eval = tf.expand_dims([predicted_id], 0)
        text_generated.append(idx2char[predicted_id])
    
    return start_string + ''.join(text_generated)

In [21]:
print(generate_text(model, start_string=u"ROMMEO: "))

ROMMEO: Juliet, whither:
and I am not spyed eyes,
The rabble is farties: be great to partly,
And with stlable and roses I know your sister; and

ISABELLA:
I thank your alms
As any of this had been a random flourishet.
Why, soo hope; gentle AUMERLE:
I know you never entertain'd his wretched dipolts
I looked about thee term, nor rough.
I'll weep their graved fight we go?
Now, sir, they come those country; your lady,
Nor the hand of even, al oft that your patience:
Your figure thee that after men?

First Moststand
to me.

PROSPERO:
Thou'lengs his writing here,
We will wish you, Whose ish closalio, for he remain
I forget with thee.

FRIART MECHICHARD II:
Lack not how, at home and here,
Being not so froward to thy bed!

PRINCE EDWARD:
My gracious Glouces, with his oracle:
As other babes,
I prove what can find your lordship to pray ye.
Why, if this torment them; what's a-groal to have scorn Forbid his knowledge.

BIANDA:
Noble lord! Poor you that favours Mowbray?

AUTOLYCUS:
By gates and pri

In [22]:
print(generate_text(model, start_string="Hello"))

Hellow, then wash'd nor part.

DUKE VINCENTIO:
But, Montague unquired here!, and knock our mother;
God from the first brail her, and the abfession of the earth have show'd top the
next knows with my brother, fair
Say our friends pove them hope as you,
That will the peace of your wild as
You ratry himself at Bohemia: within it straight
I' the people--my lord,
Go one thine enemy's fruitted in my father.
Look and am that, even he does speak with the ground
And know.

BEANCA:
Bf this foul came is the last?
The penitently gar, the napul creatures of triumph.

MENENIUS:
Ay, as, that you well.

DUKE VINCENTIO:

Second Gentleman:
Let 't straight.

First Lady:
Part thee to be thought untishes,
So hear it pass'd!

CLIOF EDWARD IV:
And yet I belime these own!
Who travels and not thee; where has but with a purpose.

DUKE OF YORK:
What a day look upon me: thou, sir, your mother care?

Lord Messenger:
That gives at ons, camberle, he'll give
with me two fancy, sit to change the crown.

KATHARINA:
Tru