In [1]:
import tensorflow as tf
import numpy as np
import os
import time

In [2]:
path_to_file = tf.keras.utils.get_file('shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')

Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt


In [3]:
text = open(path_to_file, 'rb').read().decode(encoding = 'utf-8')
len(text)

1115394

## First 250 Characters

In [4]:
print(text[:250])

First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you know Caius Marcius is chief enemy to the people.



In [5]:
vocab = sorted(set(text))
vocab[13:39]

['A',
 'B',
 'C',
 'D',
 'E',
 'F',
 'G',
 'H',
 'I',
 'J',
 'K',
 'L',
 'M',
 'N',
 'O',
 'P',
 'Q',
 'R',
 'S',
 'T',
 'U',
 'V',
 'W',
 'X',
 'Y',
 'Z']

In [6]:
print(f'{len(vocab)} Unique Characters')

65 Unique Characters


In [7]:
ids_from_chars = tf.keras.layers.StringLookup(vocabulary = list(vocab), mask_token = None)

In [8]:
chars_from_ids = tf.keras.layers.StringLookup(vocabulary=ids_from_chars.get_vocabulary(), invert = True, mask_token = None)

In [9]:
def text_from_ids(ids):
    return tf.strings.reduce_join(chars_from_ids(ids), axis = -1)

In [10]:
all_ids = ids_from_chars(tf.strings.unicode_split(text, 'UTF-8'))
all_ids

<tf.Tensor: shape=(1115394,), dtype=int64, numpy=array([19, 48, 57, ..., 46,  9,  1])>

In [11]:
ids_Dataset = tf.data.Dataset.from_tensor_slices(all_ids)

In [12]:
for ids in ids_Dataset.take(20):
    print(chars_from_ids(ids).numpy().decode('utf-8'))

F
i
r
s
t
 
C
i
t
i
z
e
n
:


B
e
f
o
r


In [13]:
seq_length = 100

In [14]:
sequences = ids_Dataset.batch(seq_length + 1, drop_remainder=True)

In [15]:
for sequence in sequences.take(1):
    print(chars_from_ids(sequence))

tf.Tensor(
[b'F' b'i' b'r' b's' b't' b' ' b'C' b'i' b't' b'i' b'z' b'e' b'n' b':'
 b'\n' b'B' b'e' b'f' b'o' b'r' b'e' b' ' b'w' b'e' b' ' b'p' b'r' b'o'
 b'c' b'e' b'e' b'd' b' ' b'a' b'n' b'y' b' ' b'f' b'u' b'r' b't' b'h'
 b'e' b'r' b',' b' ' b'h' b'e' b'a' b'r' b' ' b'm' b'e' b' ' b's' b'p'
 b'e' b'a' b'k' b'.' b'\n' b'\n' b'A' b'l' b'l' b':' b'\n' b'S' b'p' b'e'
 b'a' b'k' b',' b' ' b's' b'p' b'e' b'a' b'k' b'.' b'\n' b'\n' b'F' b'i'
 b'r' b's' b't' b' ' b'C' b'i' b't' b'i' b'z' b'e' b'n' b':' b'\n' b'Y'
 b'o' b'u' b' '], shape=(101,), dtype=string)


In [16]:
for sequence in sequences.take(10):
    print(text_from_ids(sequence).numpy())
    print()

b'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou '

b'are all resolved rather to die than to famish?\n\nAll:\nResolved. resolved.\n\nFirst Citizen:\nFirst, you k'

b"now Caius Marcius is chief enemy to the people.\n\nAll:\nWe know't, we know't.\n\nFirst Citizen:\nLet us ki"

b"ll him, and we'll have corn at our own price.\nIs't a verdict?\n\nAll:\nNo more talking on't; let it be d"

b'one: away, away!\n\nSecond Citizen:\nOne word, good citizens.\n\nFirst Citizen:\nWe are accounted poor citi'

b'zens, the patricians good.\nWhat authority surfeits on would relieve us: if they\nwould yield us but th'

b'e superfluity, while it were\nwholesome, we might guess they relieved us humanely;\nbut they think we a'

b're too dear: the leanness that\nafflicts us, the object of our misery, is as an\ninventory to particula'

b'rise their abundance; our\nsufferance is a gain to them Let us revenge this with\nour pikes, ere we bec'

b'ome rak

In [17]:
def split_input_target(sequence):
    input_text = sequence[:-1]
    target_text = sequence[1:]
    return input_text, target_text

In [18]:
split_input_target('Zohaib Sathio')

('Zohaib Sathi', 'ohaib Sathio')

In [19]:
dataset = sequences.map(split_input_target)

In [20]:
for input_text, target in dataset.take(2):
    print(text_from_ids(input_text).numpy())
    print(text_from_ids(target).numpy())
    print()

b'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou'
b'irst Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou '

b'are all resolved rather to die than to famish?\n\nAll:\nResolved. resolved.\n\nFirst Citizen:\nFirst, you '
b're all resolved rather to die than to famish?\n\nAll:\nResolved. resolved.\n\nFirst Citizen:\nFirst, you k'



In [21]:
BATCH_SIZE = 64
BUFFER_SIZE = 10000

In [22]:
dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)

dataset

<BatchDataset element_spec=(TensorSpec(shape=(64, 100), dtype=tf.int64, name=None), TensorSpec(shape=(64, 100), dtype=tf.int64, name=None))>

In [23]:
# dataset = (
#     dataset
#     .shuffle(BUFFER_SIZE)
#     .batch(BATCH_SIZE, drop_remainder = True)
#     .prefetch(tf.data.experimental.AUTOTUNE)
# )
# dataset

## Build the Model

In [24]:
vocab_size = len(ids_from_chars.get_vocabulary())

embedding_dim = 256

rnn_units = 1024

In [25]:
class MyModel(tf.keras.Model):
  def __init__(self, vocab_size, embedding_dim, rnn_units, batch_size):
    super().__init__(self)
    self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim, batch_input_shape = [batch_size, None])
    self.gru = tf.keras.layers.GRU(rnn_units,
                                   return_sequences=True,
                                   return_state=True)
    self.dense = tf.keras.layers.Dense(vocab_size)

  def call(self, inputs, states=None, return_state=False, training=False):
    x = inputs
    x = self.embedding(x, training=training)
    if states is None:
      states = self.gru.get_initial_state(x)
    x, states = self.gru(x, initial_state=states, training=training)
    x = self.dense(x, training=training)

    if return_state:
      return x, states
    else:
      return x

In [26]:
model = MyModel(vocab_size = vocab_size, embedding_dim = embedding_dim, rnn_units = rnn_units, batch_size = BATCH_SIZE)

In [27]:
for input_example_batch, target_example_batch in dataset.take(1):
    example_batch_predictions = model(input_example_batch)
    print(example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")

(64, 100, 66) # (batch_size, sequence_length, vocab_size)


In [28]:
model.summary()

Model: "my_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       multiple                  16896     
                                                                 
 gru (GRU)                   multiple                  3938304   
                                                                 
 dense (Dense)               multiple                  67650     
                                                                 
Total params: 4,022,850
Trainable params: 4,022,850
Non-trainable params: 0
_________________________________________________________________


In [29]:
sample_indices = tf.random.categorical(example_batch_predictions[0], num_samples= 1)
sample_indices = tf.squeeze(sample_indices, axis = -1).numpy()

In [30]:
sample_indices 

array([14, 36, 24, 61,  6, 12, 65, 18, 37, 10, 62, 17, 30,  1, 20, 12, 30,
       58,  0, 62, 46, 24, 62, 15, 14,  4,  4, 40, 30, 60, 51, 35, 32, 65,
       45, 65, 33,  9, 18, 41, 33, 35, 39, 37, 31, 28, 56, 33, 28, 63, 30,
       19, 27, 39,  8, 65, 38, 12, 39, 55, 35, 11, 22, 13, 26, 14, 18,  1,
        9, 26,  1, 10, 36, 19, 36, 61, 23, 26, 37, 42, 29, 59, 25,  9, 38,
       18,  0, 62, 49, 59,  8, 38, 31, 35, 55, 22, 29, 60,  7,  6])

In [31]:
print('Input: ', text_from_ids(input_example_batch[0]).numpy())
print()
print('Output: ', text_from_ids(sample_indices).numpy())

Input:  b' lord, ay, husband, friend!\nI must hear from thee every day in the hour,\nFor in a minute there are m'

Output:  b"AWKv';zEX3wDQ\nG;Qs[UNK]wgKwBA$$aQulVSzfzT.EbTVZXROqTOxQFNZ-zY;ZpV:I?MAE\n.M\n3WFWvJMXcPtL.YE[UNK]wjt-YRVpIPu,'"


In [32]:
loss = tf.losses.SparseCategoricalCrossentropy(from_logits= True)

In [33]:
example_batch_mean_loss = loss(target_example_batch, example_batch_predictions)
print('Prediction Shape:', example_batch_predictions.shape)
print('Loss: ', example_batch_mean_loss)

Prediction Shape: (64, 100, 66)
Loss:  tf.Tensor(4.189974, shape=(), dtype=float32)


In [34]:
tf.exp(example_batch_mean_loss).numpy()

66.021065

In [35]:
model.compile(optimizer='adam', loss=loss)

In [36]:
# Directory where the checkpoints will be saved
checkpoint_dir = './training_checkpoints'
# Name of the checkpoint files
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)

In [37]:
EPOCHS = 20

In [38]:
history = model.fit(dataset, epochs=EPOCHS, callbacks=[checkpoint_callback])

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [39]:
class OneStep(tf.keras.Model):
    def __init__(self, model, chars_from_ids, ids_from_chars, temperature = 1.0):
        super().__init__()
        self.model = model
        self.chars_from_ids = chars_from_ids
        self.ids_from_chars = ids_from_chars
        self.temperature = temperature
        
        skip_ids = self.ids_from_chars(['[UNK]'])[:, None]
        sparse_mask = tf.SparseTensor(
        # Put a -inf at each bad index.
        values=[-float('inf')]*len(skip_ids),
        indices=skip_ids,
        # Match the shape to the vocabulary
        dense_shape=[len(ids_from_chars.get_vocabulary())])
        self.prediction_mask = tf.sparse.to_dense(sparse_mask)
    
    @tf.function
    def generate_one_step(self, inputs, states = None):
        inputs_chars = tf.strings.unicode_split(inputs, 'UTF-8')
        input_ids = self.ids_from_chars(inputs_chars).to_tensor()
        
        predicted_logits, states = self.model(inputs = input_ids, states = states, return_state = True)
        
        predicted_logits = predicted_logits[:, -1, :]
        predicted_logits = predicted_logits/self.temperature
        
        predicted_logits = predicted_logits + self.prediction_mask
        
        predicted_ids = tf.random.categorical(predicted_logits, num_samples=1)
        predicted_ids = tf.squeeze(predicted_ids, axis=-1)
        
        predicted_chars = self.chars_from_ids(predicted_ids)
        
        return predicted_chars, states
        

In [40]:
one_step_model = OneStep(model, chars_from_ids, ids_from_chars)

In [41]:
start = time.time()
states = None
next_char = tf.constant(['ROMEO:'])
result = [next_char]

for n in range(1000):
    next_char, states = one_step_model.generate_one_step(next_char, states=states)
    result.append(next_char)
    
result = tf.strings.join(result)
end = time.time()
print(result[0].numpy().decode('utf-8'), '\n\n' + '_'*80)
print('\nRun Time: ', end - start)

ROMEO:
So much to poor, light on my duty there it,
For I will prave the field; for is't I have not
sent, in pain of duty, grant me in
the sea, and let the ond unhappings for that woman,
So I with tongue as shall redress a straw,
And such as mine own toones may be red; I love you
profess; the gods forbid!

PETRUCHIO:
A gentle prince: I know what would of robbers, bell;
And that we hold arrived in the lives by him.
The king hath been talk of woe:'d about her maid
Nor phosing ponts at heaven fight.

HASTINGS:
Master Barnardine!

Third Servingman:
But resire thence will we desire:
This cold may we dive me to have as brotch
The child of this nark is dead:
When it is but bovost, didged unto the king!
For in a poison, good nurse, thou most beauteous tomb!
For violents supplase, to stand it confess
I thrown with an hundred villages in the belly.

JULIET:
Away! for that I love him well:
Thou dost fair marching most talk of wonder;
The cockerel's hones but scall is not hoopedre:
There is no more

In [59]:
history = model.fit(dataset, epochs=EPOCHS, callbacks=[checkpoint_callback])

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [62]:
start = time.time()
states = None
next_char = tf.constant(['ROMEO:'])
result = [next_char]

for n in range(1000):
    next_char, states = one_step_model.generate_one_step(next_char, states=states)
    result.append(next_char)
    
result = tf.strings.join(result)
end = time.time()
print(result[0].numpy().decode('utf-8'), '\n\n' + '_'*80)
print('\nRun Time: ', end - start)

ROMEO:
That love the life-was not made grows,
And that thou oft provoked by him.

LADY CAPULET:
What! loss of such syrends from the maid to see
Thieves of my body's vause, and stay all hard enmumber'd.

BRUTUS:
Good my lord,
Make madness up the new of his sword reporter.

CLEOMENES:
You that be right! belike, nou woo and said;
And be it kill the fow, and mock me with thee.

Lord:
'Tis a love! and the chape, when you are Werning does
In her pretty man to's person from such life,
Better your servants, reward on him.

LADY CAPULET:
Ay, say my sovereign speak! Help me not born.

CAMILLO:
When dost suspicion again, and so did I.

ROMEO:
Nay the King of Naples,
That he should nothing else that time
He cannot live but think your latter shake:
Alas, poor Hellowing with my foe,
With old obsed is happy neither; but they on thou, saving, but I
could grow not him. I am half my wife.

KATHARINA:
Twenty cunning in the present affection to your house;
And as he taxks, he longs to see your mejoce,
And

In [64]:
tf.saved_model.save(one_step_model, 'One_Step2')



In [65]:
one_step_reloaded2 = tf.saved_model.load('One_Step2')

In [66]:
states = None
next_char = tf.constant(['JULIET:'])
result = [next_char]

for n in range(200):
  next_char, states = one_step_reloaded2.generate_one_step(next_char, states=states)
  result.append(next_char)
    
print(tf.strings.join(result)[0].numpy().decode("utf-8"))

JULIET:
By my heartily, throw forth my love
Than body sits, and the time 'twixt sisters at home,
I may not say is left be flayed
In aught as much as. Nay, talk upon the heart.

ESCALUS:
Give me my boots, I w


In [67]:
class CustomTraining(MyModel):
  @tf.function
  def train_step(self, inputs):
      inputs, labels = inputs
      with tf.GradientTape() as tape:
          predictions = self(inputs, training=True)
          loss = self.loss(labels, predictions)
      grads = tape.gradient(loss, model.trainable_variables)
      self.optimizer.apply_gradients(zip(grads, model.trainable_variables))

      return {'loss': loss}

In [68]:
model = CustomTraining(vocab_size = len(ids_from_chars.get_vocabulary()),
                      embedding_dim = embedding_dim,
                      rnn_units = rnn_units,
                      batch_size = BATCH_SIZE)

In [69]:
model.compile(optimizer=tf.keras.optimizers.Adam(),
             loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True))

In [70]:
model.fit(dataset, epochs = 3)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x7f51ed400d00>

In [71]:
model.fit(dataset, epochs = 30)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x7f51ed40ba90>

In [72]:
model.fit(dataset, epochs = 10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f51ed40bb50>

In [73]:
states = None
next_char = tf.constant(['ROMEO:'])
result = [next_char]

for n in range(500):
  next_char, states = one_step_reloaded2.generate_one_step(next_char, states=states)
  result.append(next_char)
    
print(tf.strings.join(result)[0].numpy().decode("utf-8"))

ROMEO:
Then must not on my tent them as to Cats,
And that a widow in his face,
Eneach it breathes, from this rightness wrongs
Apparent him that the queen's death,
I will some mortal to her brother to her love.

HASTINGS:
'Tis a vive, scarcely Just; and that's enough to bid me fan
An ever been was plinted amint.

LUCIO:

ISABELLA:
Could he be perfect.

MENENIUS:
Sir, sir,--

SICINIUS:
Peace, Morition, fie! 'tis like your love
Unbected: but, as I live me, in groans,
Your voices black stage, when you sha


In [74]:
model_two = OneStep(model, chars_from_ids, ids_from_chars)

In [75]:
states = None
next_char = tf.constant(['ROMEO:'])
result = [next_char]

for n in range(500):
  next_char, states = model_two.generate_one_step(next_char, states=states)
  result.append(next_char)
    
print(tf.strings.join(result)[0].numpy().decode("utf-8"))

ROMEO:
The battle calls were blows. Come, come, young Die
His place against this right: ha! thou art most celsuitor,
Is must be encounter sannous backs; and that we crown,
And I will back with the manner that the very good
French suppition.

PAULINA:
Ingendous alive to me:
This music be many friar, and Master Stays
To comb what I bid thee seeing in his awely;
Call Pemer
Seeming to all infirmation.
I have not show'd them told us when you waked like done.
God shall the barbe of sure men that stumbleds t


In [2]:
import os
print(os.listdir('/'))

['$AV_ASW', '$Recycle.Bin', '$WinREAgent', 'cpp', 'Documents and Settings', 'DumpStack.log.tmp', 'Graphics Design Work', 'hiberfil.sys', 'Intel', 'Local Disk (D)', 'main.c', 'msdia80.dll', 'MSOCache', 'MUET Studies', 'My Mobile Data (Do Not Open)', 'OneDriveTemp', 'oraclexe', 'pagefile.sys', 'PerfLogs', 'Program Files', 'Program Files (x86)', 'ProgramData', 'Recovery', 'src', 'swapfile.sys', 'SWSetup', 'System Volume Information', 'Users', 'Visual Studio Codes', 'wallpapers', 'Windows', 'xampp']
