In [1]:
import numpy as np
import pandas as pd

import seaborn as sns
import matplotlib.pyplot as plt

import tensorflow as tf

In [2]:
path = "/content/drive/MyDrive/Colab Notebooks/Deep learning bootcamp/data/shakespeare.txt"

In [3]:
text = open(path,'r').read()

In [4]:
print(text[:500])


                     1
  From fairest creatures we desire increase,
  That thereby beauty's rose might never die,
  But as the riper should by time decease,
  His tender heir might bear his memory:
  But thou contracted to thine own bright eyes,
  Feed'st thy light's flame with self-substantial fuel,
  Making a famine where abundance lies,
  Thy self thy foe, to thy sweet self too cruel:
  Thou that art now the world's fresh ornament,
  And only herald to the gaudy spring,
  Within thine own bu


In [5]:
vocab = sorted(set(text))
print(vocab)
len(vocab)

['\n', ' ', '!', '"', '&', "'", '(', ')', ',', '-', '.', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '>', '?', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '[', ']', '_', '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '|', '}']


84

## Step 2 : Text Processing

### Text Vectorization

In [6]:
char_to_ind = {u:i for i,u in enumerate(vocab)}

In [7]:
char_to_ind

{'\n': 0,
 ' ': 1,
 '!': 2,
 '"': 3,
 '&': 4,
 "'": 5,
 '(': 6,
 ')': 7,
 ',': 8,
 '-': 9,
 '.': 10,
 '0': 11,
 '1': 12,
 '2': 13,
 '3': 14,
 '4': 15,
 '5': 16,
 '6': 17,
 '7': 18,
 '8': 19,
 '9': 20,
 ':': 21,
 ';': 22,
 '<': 23,
 '>': 24,
 '?': 25,
 'A': 26,
 'B': 27,
 'C': 28,
 'D': 29,
 'E': 30,
 'F': 31,
 'G': 32,
 'H': 33,
 'I': 34,
 'J': 35,
 'K': 36,
 'L': 37,
 'M': 38,
 'N': 39,
 'O': 40,
 'P': 41,
 'Q': 42,
 'R': 43,
 'S': 44,
 'T': 45,
 'U': 46,
 'V': 47,
 'W': 48,
 'X': 49,
 'Y': 50,
 'Z': 51,
 '[': 52,
 ']': 53,
 '_': 54,
 '`': 55,
 'a': 56,
 'b': 57,
 'c': 58,
 'd': 59,
 'e': 60,
 'f': 61,
 'g': 62,
 'h': 63,
 'i': 64,
 'j': 65,
 'k': 66,
 'l': 67,
 'm': 68,
 'n': 69,
 'o': 70,
 'p': 71,
 'q': 72,
 'r': 73,
 's': 74,
 't': 75,
 'u': 76,
 'v': 77,
 'w': 78,
 'x': 79,
 'y': 80,
 'z': 81,
 '|': 82,
 '}': 83}

In [8]:
ind_to_char = np.array(vocab)

In [9]:
ind_to_char

array(['\n', ' ', '!', '"', '&', "'", '(', ')', ',', '-', '.', '0', '1',
       '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '>', '?',
       'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
       'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
       '[', ']', '_', '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i',
       'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
       'w', 'x', 'y', 'z', '|', '}'], dtype='<U1')

In [10]:
encoded_text = np.array([char_to_ind[c] for c in text])

In [11]:
len(encoded_text)

5445609

In [12]:
encoded_text

array([ 0,  1,  1, ..., 30, 39, 29])

In [13]:
sample = text[:20]
sample

'\n                   '

In [14]:
encoded_text[:20]

array([0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

## Creating Batches

In [15]:
print(text[:500])


                     1
  From fairest creatures we desire increase,
  That thereby beauty's rose might never die,
  But as the riper should by time decease,
  His tender heir might bear his memory:
  But thou contracted to thine own bright eyes,
  Feed'st thy light's flame with self-substantial fuel,
  Making a famine where abundance lies,
  Thy self thy foe, to thy sweet self too cruel:
  Thou that art now the world's fresh ornament,
  And only herald to the gaudy spring,
  Within thine own bu


In [16]:
line = "From fairest creatures we desire increase"

In [17]:
len(line)

41

In [18]:
part_stanza = """From fairest creatures we desire increase,
  That thereby beauty's rose might never die,
  But as the riper should by time decease,"""

In [19]:
len(part_stanza)

131

### Training Sequences

In [20]:
seq_len = 120

In [21]:
total_num_seq = len(text)//(seq_len+1)

In [22]:
total_num_seq

45005

In [23]:
char_dataset = tf.data.Dataset.from_tensor_slices(encoded_text)

for i in char_dataset.take(500):
  print(ind_to_char[i.numpy()])



 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1


 
 
F
r
o
m
 
f
a
i
r
e
s
t
 
c
r
e
a
t
u
r
e
s
 
w
e
 
d
e
s
i
r
e
 
i
n
c
r
e
a
s
e
,


 
 
T
h
a
t
 
t
h
e
r
e
b
y
 
b
e
a
u
t
y
'
s
 
r
o
s
e
 
m
i
g
h
t
 
n
e
v
e
r
 
d
i
e
,


 
 
B
u
t
 
a
s
 
t
h
e
 
r
i
p
e
r
 
s
h
o
u
l
d
 
b
y
 
t
i
m
e
 
d
e
c
e
a
s
e
,


 
 
H
i
s
 
t
e
n
d
e
r
 
h
e
i
r
 
m
i
g
h
t
 
b
e
a
r
 
h
i
s
 
m
e
m
o
r
y
:


 
 
B
u
t
 
t
h
o
u
 
c
o
n
t
r
a
c
t
e
d
 
t
o
 
t
h
i
n
e
 
o
w
n
 
b
r
i
g
h
t
 
e
y
e
s
,


 
 
F
e
e
d
'
s
t
 
t
h
y
 
l
i
g
h
t
'
s
 
f
l
a
m
e
 
w
i
t
h
 
s
e
l
f
-
s
u
b
s
t
a
n
t
i
a
l
 
f
u
e
l
,


 
 
M
a
k
i
n
g
 
a
 
f
a
m
i
n
e
 
w
h
e
r
e
 
a
b
u
n
d
a
n
c
e
 
l
i
e
s
,


 
 
T
h
y
 
s
e
l
f
 
t
h
y
 
f
o
e
,
 
t
o
 
t
h
y
 
s
w
e
e
t
 
s
e
l
f
 
t
o
o
 
c
r
u
e
l
:


 
 
T
h
o
u
 
t
h
a
t
 
a
r
t
 
n
o
w
 
t
h
e
 
w
o
r
l
d
'
s
 
f
r
e
s
h
 
o
r
n
a
m
e
n
t
,


 
 
A
n
d
 
o
n
l
y
 
h
e
r
a
l
d
 
t
o
 
t
h
e
 
g
a
u
d
y
 
s
p
r
i
n
g
,


 
 
W
i
t
h
i
n
 
t
h
i
n
e
 
o
w
n
 
b
u


In [24]:
sequences = char_dataset.batch(seq_len+1,drop_remainder=True)

In [25]:
def create_seq_targets(seq):
  input_txt = seq[:-1]
  target_txt = seq[1:]
  return input_txt, target_txt

In [26]:
dataset = sequences.map(create_seq_targets)

In [27]:
for input_txt, target_txt in dataset.take(1):
  print(input_txt.numpy())
  print(''.join(ind_to_char[input_txt.numpy()]))
  print()
  print(target_txt.numpy())
  print(''.join(ind_to_char[target_txt.numpy()]))

[ 0  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1 12  0
  1  1 31 73 70 68  1 61 56 64 73 60 74 75  1 58 73 60 56 75 76 73 60 74
  1 78 60  1 59 60 74 64 73 60  1 64 69 58 73 60 56 74 60  8  0  1  1 45
 63 56 75  1 75 63 60 73 60 57 80  1 57 60 56 76 75 80  5 74  1 73 70 74
 60  1 68 64 62 63 75  1 69 60 77 60 73  1 59 64 60  8  0  1  1 27 76 75]

                     1
  From fairest creatures we desire increase,
  That thereby beauty's rose might never die,
  But

[ 1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1 12  0  1
  1 31 73 70 68  1 61 56 64 73 60 74 75  1 58 73 60 56 75 76 73 60 74  1
 78 60  1 59 60 74 64 73 60  1 64 69 58 73 60 56 74 60  8  0  1  1 45 63
 56 75  1 75 63 60 73 60 57 80  1 57 60 56 76 75 80  5 74  1 73 70 74 60
  1 68 64 62 63 75  1 69 60 77 60 73  1 59 64 60  8  0  1  1 27 76 75  1]
                     1
  From fairest creatures we desire increase,
  That thereby beauty's rose might never die,
  But 


## Generating training batches

In [28]:
batch_size = 128
buffer_size = 10000
dataset = dataset.shuffle(buffer_size).batch(batch_size,drop_remainder=True)

In [29]:
dataset

<_BatchDataset element_spec=(TensorSpec(shape=(128, 120), dtype=tf.int64, name=None), TensorSpec(shape=(128, 120), dtype=tf.int64, name=None))>

## Creating the Model

In [30]:
vocab_size = len(vocab)
embed_dim = 64
rnn_neurons = 1026

In [31]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM,Dense,Embedding,Dropout,GRU

### Setting up Loss Function

In [32]:
from tensorflow.keras.losses import sparse_categorical_crossentropy

In [33]:
def sparse_cat_loss(y_true,y_pred):
  return sparse_categorical_crossentropy(y_true,y_pred,from_logits=True)

In [34]:
def create_model(vocab_size,embed_dim,rnn_nerons,batch_size):
  model = Sequential()
  model.add(Embedding(vocab_size,embed_dim,batch_input_shape=[batch_size,None]))
  model.add(GRU(rnn_neurons,return_sequences=True,
                stateful=True,recurrent_initializer='glorot_uniform'))
  model.add(Dense(vocab_size))

  model.compile('adam',loss=sparse_cat_loss)

  return model

In [35]:
model = create_model(vocab_size,embed_dim,rnn_neurons,batch_size)

In [36]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (128, None, 64)           5376      
                                                                 
 gru (GRU)                   (128, None, 1026)         3361176   
                                                                 
 dense (Dense)               (128, None, 84)           86268     
                                                                 
Total params: 3452820 (13.17 MB)
Trainable params: 3452820 (13.17 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


# Training Model

In [37]:
for input_example_batch, target_example_batch in dataset.take(1):
  example_batch_predictions = model(input_example_batch)
  print(example_batch_predictions.shape, "<==(batch_size,sequence_length,vocab_size)")

(128, 120, 84) <==(batch_size,sequence_length,vocab_size)


In [38]:
example_batch_predictions

<tf.Tensor: shape=(128, 120, 84), dtype=float32, numpy=
array([[[-6.8441760e-03,  6.5293112e-03, -5.7583414e-03, ...,
          6.5148198e-03, -6.3490524e-04, -4.1530649e-03],
        [-4.9437128e-04,  6.5458510e-03, -1.1687280e-03, ...,
          4.5698797e-03, -7.6071974e-03, -2.1359727e-03],
        [-8.9640506e-03,  7.7757979e-04, -9.1471308e-04, ...,
         -1.5221634e-03, -1.2192574e-02, -8.3983680e-03],
        ...,
        [-1.7940054e-04,  7.6958421e-04, -3.6414247e-03, ...,
          8.0466847e-04, -4.7672475e-03, -4.4003972e-03],
        [-4.5002527e-03,  3.4668024e-03, -1.1800140e-03, ...,
         -5.6407349e-03, -1.2624991e-03, -3.2684370e-03],
        [-5.2871276e-03,  5.4416428e-03, -9.0950671e-05, ...,
         -7.8130728e-03,  1.2253773e-03, -3.1532783e-03]],

       [[ 3.0288190e-04, -2.0513099e-03,  6.2405779e-03, ...,
         -1.2474265e-03, -7.1879753e-05,  1.5124838e-03],
        [ 4.4252668e-03,  1.0573687e-03,  2.3814463e-03, ...,
          5.6002685e-03, -2

In [39]:
samples_indices = tf.random.categorical(example_batch_predictions[0],num_samples=1)

In [40]:
samples_indices

<tf.Tensor: shape=(120, 1), dtype=int64, numpy=
array([[14],
       [70],
       [26],
       [18],
       [16],
       [53],
       [19],
       [74],
       [75],
       [45],
       [56],
       [74],
       [ 2],
       [53],
       [61],
       [75],
       [49],
       [67],
       [32],
       [22],
       [24],
       [63],
       [30],
       [68],
       [44],
       [48],
       [40],
       [17],
       [ 5],
       [67],
       [74],
       [57],
       [27],
       [67],
       [ 4],
       [30],
       [80],
       [20],
       [ 6],
       [78],
       [41],
       [12],
       [21],
       [68],
       [74],
       [51],
       [11],
       [ 3],
       [33],
       [51],
       [61],
       [69],
       [53],
       [ 5],
       [14],
       [51],
       [82],
       [31],
       [16],
       [40],
       [30],
       [29],
       [62],
       [41],
       [26],
       [26],
       [63],
       [50],
       [30],
       [57],
       [28],
       [47],
       [62],
   

In [41]:
sampled_indices = tf.squeeze(samples_indices,axis=1).numpy()

In [42]:
sampled_indices

array([14, 70, 26, 18, 16, 53, 19, 74, 75, 45, 56, 74,  2, 53, 61, 75, 49,
       67, 32, 22, 24, 63, 30, 68, 44, 48, 40, 17,  5, 67, 74, 57, 27, 67,
        4, 30, 80, 20,  6, 78, 41, 12, 21, 68, 74, 51, 11,  3, 33, 51, 61,
       69, 53,  5, 14, 51, 82, 31, 16, 40, 30, 29, 62, 41, 26, 26, 63, 50,
       30, 57, 28, 47, 62, 17, 55, 51, 41,  9, 26, 38, 76, 31, 21, 82,  8,
        7, 32, 42, 68, 34, 50,  5, 75, 49, 50, 60, 22, 10, 56, 83,  4, 28,
       29,  1, 23,  3, 80,  4, 53, 42, 20, 24, 51, 79, 51, 72, 70, 66,  6,
       18])

In [43]:
print("Given the input seq: ")
print("".join(ind_to_char[input_example_batch[0]]))
print()
print("Next Char Predictions: ")
print("".join(ind_to_char[sampled_indices]))

Given the input seq: 
are three
    That Rome should dote on; yet, by the faith of men,
    We have some old crab trees here at home that will

Next Char Predictions: 
3oA75]8stTas!]ftXlG;>hEmSWO6'lsbBl&Ey9(wP1:msZ0"HZfn]'3Z|F5OEDgPAAhYEbCVg6`ZP-AMuF:|,)GQmIY'tXYe;.a}&CD <"y&]Q9>ZxZqok(7


In [44]:
epochs = 30

In [46]:
from tensorflow.keras.callbacks import EarlyStopping

In [47]:
early_stop = EarlyStopping(monitor='loss',patience=2)

In [48]:
model.fit(dataset,epochs=epochs,callbacks=[early_stop])

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.src.callbacks.History at 0x7b5aa9cc47c0>

# Generating Text

In [49]:
model.save('shakespeare_gen_model.h5')

  saving_api.save_model(


In [50]:
from tensorflow.keras.models import load_model

In [52]:
model = create_model(vocab_size,embed_dim,rnn_neurons,batch_size=1)
model.load_weights('shakespeare_gen_model.h5')
model.build(tf.TensorShape([1,None]))

In [53]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_2 (Embedding)     (1, None, 64)             5376      
                                                                 
 gru_2 (GRU)                 (1, None, 1026)           3361176   
                                                                 
 dense_2 (Dense)             (1, None, 84)             86268     
                                                                 
Total params: 3452820 (13.17 MB)
Trainable params: 3452820 (13.17 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [54]:
def generate_text(model,start_seed,gen_size=100,temp=1.0):
  num_generate = gen_size
  input_eval = [char_to_ind[s] for s in start_seed]
  input_eval = tf.expand_dims(input_eval,0)

  text_generated = []
  temperature = temp

  model.reset_states()

  for i in range(num_generate):
    predictions = model(input_eval)

    predictions = tf.squeeze(predictions,0)

    predictions = predictions/temperature
    predicted_id = tf.random.categorical(predictions,num_samples=1)[-1,0].numpy()

    input_eval = tf.expand_dims([predicted_id],0)
    text_generated.append(ind_to_char[predicted_id])
  return (start_seed+''.join(text_generated))

In [55]:
print(generate_text(model,"flower",gen_size=1000))

flowers in hurts do it reck)
    To book and run and watch your presence?
    Ay, and with the life of petitions well,
    Yet now less in one part flated out,
    Look'd winds do oats; for leaving she alone
    Upon the heart of Roderigence of Protector. Dost thou know againe the sum of my
    and flats! come away.
  APEMANTUS. These natural spirit is mistress of peace, yet love
    The slaves of Rome, thy life.
  LUCIUS' SERVANT. My lady's chamber.
    Farewell, discretion; from thy words cont gods,
    How much I am anwelth! And then ke found,
    To give thee gallows than either the winds
    Of gomand, his forkness him; and on it holds his looks
    The very favour with as news as longer,
    And in conclusion with the crown a poise
    Shall itches with a divorce of love,
    Call'st wit good nature by side lone,
    We find her out within these tears
    To cure their suits you have a chuck affections.
  DUKE. Are not you joint for?
  CLOWN. I will require you a man to prove for

In [56]:
print(generate_text(model,"JULIET",gen_size=1000))

JULIET AARON. What is the fool; I have done undone do expours.
    But welcome, doth the Queen of news are just.  
  GLOUCESTER. Advance, starve after this. Return with himself
    When I walk o'er with curs and yours, let me forth ye
    The first: were left immortal garter.
  Nurse. So please you, sir, this love that sought your honour I'll repent the meat.
  POLIXENES. If we
                       YOUERGRO
  HORTENSIO. Grest, was's he these gamer
 s of latents of warlike rogue's other way.  

               Enter PATROCLUS

  PRIAM SHYLOCK. Marry, with confounded bought against them?
    Or who knew of his fee?
    Where be it minst thou fought against my youth,
    Shall Hasting deep, which was a many's life.

                              Enter MALVOLIO

  MALVOLIO. My lords, withal
    Am given me than in marrows over present
    A merchatch'd opinion, and their ears ale strong,
    Puts hither to ashore;
    Not here so much   Your mistress saw you Duke of March. What's the matt