# RNNs Romeo Juleit Play Generator

In [1]:
%tensorflow_version 2.x # this line is not required unless you  r in a notebook
from keras.preprocessing import sequence
import keras
import tensorflow as tf
import os
import numpy as np


Colab only includes TensorFlow 2.x; %tensorflow_version has no effect.


In [2]:
path_to_file = tf.keras.utils.get_file('shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')

Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt


In [3]:
# We can load our own text data also
#just uncomment the below code if you want your own data
######################################################
######################################################
#from google.colab import files
#path_to_file = list(files.upload().keys())[0]

In [4]:
# Read, then decode for py2 compat.
text = open(path_to_file, 'rb').read().decode(encoding='utf-8')
# length of text is the number of characters in it
print(f'Length of text: {len(text)} characters')

Length of text: 1115394 characters


In [5]:
# Take a look at the first 250 characters in text
print(text[:250])

First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you know Caius Marcius is chief enemy to the people.



#ENCODING
since the text isn't encoded
We are going to encode each unique character as a different integer by ourself

In [6]:
# The unique characters in the file
vocab = sorted(set(text))
print(f'{len(vocab)} unique characters')

65 unique characters


In [7]:
# Creating a mapping from unique characters to indices
char2idx = {u:i for i , u in enumerate(vocab)}
idx2char = np.array(vocab)

def text_to_int(text):
  return np.array([char2idx[c] for c in text])

text_as_int = text_to_int(text)

In [8]:
# lets have a look at how our text is encoded
print("TEXT : ", text[:13])
print("ENCODED : ", text_to_int(text[:13]))

TEXT :  First Citizen
ENCODED :  [18 47 56 57 58  1 15 47 58 47 64 43 52]


In [9]:
# Decoding function,  just i n case we want to see text.
def int_to_text(ints):
  try:
    ints = ints.numpy()
  except:
    pass
  return ''.join(idx2char[ints])
print(int_to_text(text_as_int[:13]))

First Citizen


In [10]:
# TRAINING OUR MODEL
# The tarinig example we r preparing will use a seq_length as input
# and a seq_length as O/P where sequence is original sequence shifted one letter to the right
# FOR EXAMPLE : INPUT: Hell || OUTPUT : ello
seq_length = 100 # length of sequence for a training example
example_per_epoch = len(text)//(seq_length+1)
# Create a training examples / targets
char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)

In [11]:
# now we can use the batch method to turn this stream of characters into batches of desired length
sequences = char_dataset.batch(seq_length+1, drop_remainder=True)

In [12]:
def split_input_target(chunk):  # for the example : hello
    input_text = chunk[:-1]  # hell
    target_text = chunk[1:]  # ello
    return input_text, target_text # hell, ello
dataset = sequences.map(split_input_target) # we use map to apply the above funnction to every entry


In [13]:
for x, y in dataset.take(2):
  print("\n\nEXAMPLE\n")
  print("INPUT")
  print(int_to_text(x))
  print("\nOUTPUT")
  print(int_to_text(y))



EXAMPLE

INPUT
First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You

OUTPUT
irst Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You 


EXAMPLE

INPUT
are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you 

OUTPUT
re all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you k


In [14]:
# Finally We can make our training batches
BATCH_SIZE = 64
VOCAB_SIZE = len(vocab) # vocab is umber of unique characters 
EMBEDDING_DIM = 256
RNN_UNITS = 1024
# Buffer size to shuffle the dataset
# (TF data is designed to work with possibly infinite sequences,
# so it doesn't attempt to shuffle the entire sequence in memory. Instead,
# it maintains a buffer in which it shuffles elements).
BUFFER_SIZE = 10000

data = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)

In [15]:
# Buliding our model
# the reason of building our model is to get flexibility
# as for now, our model is trained for batch_size = 64
# as we will generalise the build_model function
# we can change and predict for batch_size = 1 
def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
  model = tf.keras.Sequential([
      tf.keras.layers.Embedding(vocab_size,embedding_dim,
                                batch_input_shape=[batch_size,None]),
      tf.keras.layers.LSTM(rnn_units,
                           return_sequences=True,
                           stateful=True,
                           recurrent_initializer='glorot_uniform'),
      tf.keras.layers.Dense(vocab_size)
  ])
  return model
model = build_model(VOCAB_SIZE,EMBEDDING_DIM, RNN_UNITS, BATCH_SIZE)
model.summary()


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (64, None, 256)           16640     
                                                                 
 lstm (LSTM)                 (64, None, 1024)          5246976   
                                                                 
 dense (Dense)               (64, None, 65)            66625     
                                                                 
Total params: 5,330,241
Trainable params: 5,330,241
Non-trainable params: 0
_________________________________________________________________


In [17]:
# CREATING A LOSS FUNCTION
for input_example_batch, target_example_batch in data.take(1):
    example_batch_predictions = model(input_example_batch) #ask our model for a prediction based on first batch of our training dataset 
    print(example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)") #print out the output shape
    

(64, 100, 65) # (batch_size, sequence_length, vocab_size)


In [18]:
# We can see that the prediction is an array of 64 arrays
# one for each entry in the batch
print(len(example_batch_predictions))
print(example_batch_predictions)

64
tf.Tensor(
[[[ 4.84519359e-03 -5.20324567e-04  6.50518853e-03 ... -2.11109454e-03
   -1.22855022e-03 -4.75838664e-04]
  [ 5.55965235e-04  1.08737824e-03  9.14832950e-03 ... -5.33894030e-03
   -4.00131848e-03  4.67943959e-03]
  [ 2.37424695e-03  8.18407070e-03  8.83196760e-03 ... -2.79903295e-03
   -1.72661385e-03  2.93124001e-03]
  ...
  [ 2.91737751e-03 -6.84425700e-04  9.18210950e-03 ...  6.68871915e-03
   -7.73472060e-03  6.57017156e-03]
  [-4.45492519e-03 -1.70314056e-03  1.15542868e-02 ...  5.35736699e-03
   -1.24955345e-02  5.32854348e-03]
  [-3.56473704e-03 -3.07771401e-03  4.80464799e-03 ...  5.82648162e-03
   -6.90243579e-03  6.58657914e-03]]

 [[-6.44791406e-03 -1.48787710e-03  5.71271218e-03 ... -4.61409945e-04
   -5.27471490e-03 -8.94126075e-04]
  [-8.59866105e-03  3.28411628e-03  8.99816304e-03 ...  2.93477159e-03
   -4.91230516e-03 -1.50405220e-03]
  [-9.14547686e-03  2.73040263e-03  9.91271436e-03 ... -1.24208513e-03
   -6.53607352e-03  3.72680649e-03]
  ...
  [ 2.648

In [19]:
# lets examine one prediction
pred = example_batch_predictions[0]
print(len(pred))
print(pred)
# notice that this is a 2d array of length 100, where
# each interior array is prediction for the next character
# at each time stamp

100
tf.Tensor(
[[ 0.00484519 -0.00052032  0.00650519 ... -0.00211109 -0.00122855
  -0.00047584]
 [ 0.00055597  0.00108738  0.00914833 ... -0.00533894 -0.00400132
   0.00467944]
 [ 0.00237425  0.00818407  0.00883197 ... -0.00279903 -0.00172661
   0.00293124]
 ...
 [ 0.00291738 -0.00068443  0.00918211 ...  0.00668872 -0.00773472
   0.00657017]
 [-0.00445493 -0.00170314  0.01155429 ...  0.00535737 -0.01249553
   0.00532854]
 [-0.00356474 -0.00307771  0.00480465 ...  0.00582648 -0.00690244
   0.00658658]], shape=(100, 65), dtype=float32)


In [20]:
# finally we will look at a prediction at first time step
time_pred = pred[0]
print(len(time_pred))
print(time_pred)
# its 65 values representing the probability of each character occuring next.

65
tf.Tensor(
[ 4.8451936e-03 -5.2032457e-04  6.5051885e-03 -1.1053171e-03
 -3.9488324e-03 -5.2038545e-04  3.3307176e-03  1.3175602e-03
  6.7410963e-03  2.0613954e-03 -3.7900023e-03 -1.8522888e-04
 -9.6825641e-03 -1.3918908e-03 -5.7323463e-03 -7.0487303e-03
 -2.8447923e-03 -1.3721938e-03  7.2481334e-03  1.1895994e-03
 -3.4496340e-03  4.3425588e-03 -4.0753251e-03 -3.7398853e-03
 -1.1178544e-03  2.1128780e-03 -4.4179726e-03 -8.6903535e-03
  3.1922359e-03  1.2455224e-03 -5.4663426e-04 -1.2829815e-03
 -3.1931244e-03 -1.7675677e-03 -2.0502363e-03 -1.3676065e-05
 -1.1985577e-03 -2.9483987e-03 -3.2876036e-04  1.9188905e-03
 -8.3032288e-03 -2.4958865e-03 -5.0046765e-03 -5.4228930e-03
  1.3491050e-03 -9.7656939e-03  1.8994405e-03 -5.2947542e-03
 -2.3196163e-03  1.9626985e-03  1.2982851e-03  2.7083885e-04
 -9.7572245e-04 -5.5238120e-03  3.2170513e-04  1.1889951e-03
  4.0752236e-03  1.8799721e-03 -2.8376398e-03 -7.5949794e-03
  1.8535932e-03  1.5366994e-03 -2.1110945e-03 -1.2285502e-03
 -4.758386

In [21]:
# if we want to determine predicted character we need 
# to sample the o/p distribution (pick a value based on probability distribution)
sampled_indices = tf.random.categorical(pred, num_samples = 1)

# now we can reshape that array and convert all integers to numbers
# to see the actual characters
sampled_indices = np.reshape(sampled_indices,(1,-1))[0]
predicted_chars = int_to_text(sampled_indices)
# and this is what our model predicted for training sequence 1
predicted_chars

'JBDTPCxwz&T3?SE:GSdl h\nZofQm!zck:hfeWltxkMT3WEcFkPokp&MrtI\ng3!SlBXK PHKPMfoCFN$-tlLg.PakN?\nvlsH-.ZGY'

In [22]:
# now we will define a loss function
# so that it can compare the predicted o/p to the expected o/p
# and give us some numeric values representing how close
# those two values were
def loss(labels, logits):
  return tf.keras.losses.sparse_categorical_crossentropy(labels,logits, from_logits = True)
  

In [24]:
# compiling our model
model.compile(optimizer = 'adam', loss = loss)

In [26]:
# creating checkpoints
# we are going to setup anf configure our model to save checkpoints as it trains
# this will allow us to load our model from a checckpoint and 
# continue training it.

# Directory whr the checkpoints will be saved
checkpoint_dir = './training_checkpoints'
# Name of the checkpoint files
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath = checkpoint_prefix,
    save_weights_only = True
)


In [27]:
# finally we will start training our model
# THE MORE EPOCHS , MORE BETTER OUR MODEL WILL GET IN THIS CASE
# ITS NOT GENERALISED BUT IN THIS CASE, MORE EPOCHS =  BETTER MODEL 
# BECOZ , in these type of models, overfitting is really hard to get
# so you can train your model for more epochs to get better results
history = model.fit(data, epochs=40,callbacks=[checkpoint_callback])

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


In [28]:
# LOADING THE MODEL
# we will rebuild our mode from a checkpoint using a batch_size = 1
# so that we can feed one piece of text to the model and use it to
# make a prediction
model = build_model(VOCAB_SIZE, EMBEDDING_DIM, RNN_UNITS, batch_size = 1)

In [31]:
# once the model is trained
# to check the latest checkpoint
model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))
model.build(tf.TensorShape([1,None]))


In [None]:
# You can uncomment this, if u want to see any checkpoints
# checkpoint_num = 10
# model.load_weights(tf.train.load_checkpoint("./training_checkpoints/ckpt_" + str(checkpoint_num)))
# model.build(tf.TensorShape([1,None]))

In [38]:
def generate_text(model, start_string):
  # evaluation step(generating text using the learned model)

  # number of characters to generate
  num_generate = 800

  # Converting our start string to numbers ( vectorizing )
  input_eval = [char2idx[s] for s in start_string]
  input_eval = tf.expand_dims(input_eval, 0 )

  text_generated = []
  # low temperatures results in more predictable text
  # higher temperatures results in more surprising text
  # experiment to find the best setting
  temperature = 1.0

  # here batch size == 1
  model.reset_states()
  for i in range(num_generate):
    predictions = model(input_eval)
    # remove the batch dimension
    predictions = tf.squeeze(predictions,0)
    # using a categorical distribution to predict the characters returned by the model
    predictions = predictions/temperature
    predicted_id = tf.random.categorical(predictions,num_samples = 1)[-1,0].numpy()

    # we pass the predicted character as next input to the model
    # along with the previous hidden state
    input_eval = tf.expand_dims([predicted_id],0)

    text_generated.append(idx2char[predicted_id])
  return (start_string + ''.join(text_generated))



In [42]:
# run this code and input any string 
# our model will generate text based on the romeo - juliet play its being trained on
inp = input("Type any starting string:  ")
print(generate_text(model, inp))
# I made myself as a character in this play ,for example , :)

Type any starting string:  CHIRAG:
CHIRAG:
Can your weaken sister ensu,
Of holy sun here human,
And make your sou more money'd before me
And say 'point the mascorror of our heat,
And made the monster sungs upon the draw
Seeking for her best and so sever it brought a burthen:
incestor you, sir, hear me but one drotorney--
To keep the oracle where yout in Burk and with humaner, skine on't; let them not soften,
Our Romeo hath supply ends: they weep no more suspicing,
And send the imagine of it. Thou, Lancaster.

KING HENRY VI:
Had I but thoughts, alas, whereon theself and there
Shall thou be said the tyrant. What!

KING RICHARD II:
No, to the dignity and swis news is done,
And I am content and then ne'er mother;
But stir not, rule my life to hold our tents
Till he be ceremonious hand,
And with your highness told me, I knew not whatso
