<a href="https://colab.research.google.com/github/awais-yaqoob-ml/ml-codes/blob/main/playwriterRNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
%tensorflow_version 2.x

from keras.datasets import imdb
import tensorflow as tf
from keras.utils.data_utils import pad_sequences
import numpy as np
import os

Colab only includes TensorFlow 2.x; %tensorflow_version has no effect.


In [2]:
path_to_file = tf.keras.utils.get_file('shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')

Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt


In [3]:
text = open(path_to_file, 'rb').read().decode(encoding='utf-8')
print(f'Length of text charachters is {len(text)}')

Length of text charachters is 1115394


In [4]:
###encoding

vocab = sorted(set(text))
char2idx = {u:i for i, u in enumerate(vocab)}
idx2char = np.array(vocab)

def text_to_int(text):
  return np.array([char2idx[c] for c in text])

text_as_int = text_to_int(text)

In [5]:
print(text[:18])
print(text_to_int(text[:18]))

First Citizen:
Bef
[18 47 56 57 58  1 15 47 58 47 64 43 52 10  0 14 43 44]


In [6]:
def int_to_text(integers):
  try:
    integers = integers.numpy()
  except:
    pass
  return ''.join(idx2char[integers])

print(int_to_text(text_to_int(text[:18])))

First Citizen:
Bef


In [7]:
### make dataset
seq_len = 100
examples_per_epoch = len(text)//(seq_len+1)
char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)


In [8]:
sequences = char_dataset.batch(seq_len+1, drop_remainder=True)

In [9]:
def split_input_target(chunk):
  input_text = chunk[:-1]
  target_text = chunk[1:]
  return input_text, target_text

dataset = sequences.map(split_input_target)

In [10]:
BATCH_SIZE = 64
VOCAB_SIZE = len(vocab)
EMBEDDING_DIM = 256
RNN_UNITS = 1024
BUFFER_SIZE = 10000

data = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)

In [11]:
##build model
def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
  model = tf.keras.Sequential([
      tf.keras.layers.Embedding(vocab_size, embedding_dim, batch_input_shape=[batch_size, None]),
      tf.keras.layers.LSTM(rnn_units, return_sequences=True, stateful=True, recurrent_initializer="glorot_uniform"),
      tf.keras.layers.Dense(vocab_size)
  ])

  return model

model = build_model(VOCAB_SIZE, EMBEDDING_DIM, RNN_UNITS, BATCH_SIZE)
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (64, None, 256)           16640     
                                                                 
 lstm (LSTM)                 (64, None, 1024)          5246976   
                                                                 
 dense (Dense)               (64, None, 65)            66625     
                                                                 
Total params: 5,330,241
Trainable params: 5,330,241
Non-trainable params: 0
_________________________________________________________________


In [12]:
def loss(labels, logits):
  return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)

In [13]:
model.compile(optimizer='adam', loss=loss)

In [14]:
checkpoint_dir = './training_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir,'ckpt_{epoch}')
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(checkpoint_prefix, save_weights_only=True)

In [15]:
history=model.fit(data, epochs=40, callbacks=[checkpoint_callback])

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


In [16]:
model=build_model(VOCAB_SIZE, EMBEDDING_DIM, RNN_UNITS, 1)

In [21]:
model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))
model.build(tf.TensorShape([1,None]))

In [35]:
def generate_text(model, start_string):
  num_generate=800
  input_eval=[char2idx[s] for s in start_string]
  input_eval=tf.expand_dims(input_eval,0)
  text_generated = []
  temperature=1.0
  model.reset_states()
  for i in range(num_generate):
    predictions = model(input_eval)
    predictions = tf.squeeze(predictions, 0)
    predictions = predictions/temperature
    predict_id = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy()
    input_eval = tf.expand_dims([predict_id], 0)
    text_generated.append(idx2char[predict_id])
    # print(text_generated)
  return (start_string + ''.join(text_generated))

In [36]:
inp = input('input txt')
print(generate_text(model, inp))

input txtjoe
joen, like thou
Whose names?

ROMEO:
I stretch'd and follow them I am not too young Duke of York;
Therefore Warwick, our barroughour night: to your sufficital
Than is the pot he did before 't.

POLIXENES:
I'll draw the heir a fearful lad
With tears and ears, and so it is in Padua;
And what appearing in our cousins
Exforce thy will be welk, who buddle joy
Sell throne of mine, and then durantime sours themselves all the
rebellion and defen them;
Like over than by God's richmost of a widow's; new appracious in praments, but be gone.

ROMEO:
Heavens!
What is't? What hast thou of golden stord
The law upon a second her our reeches supposite
Make pale at me, and my sweet son,
Seeing thou hast provided me to save strong right,
What our contrmem tyranny be required!
What look in thy maid!

PAULINA:
I 
