<a href="https://colab.research.google.com/github/abeenoch/studypractise/blob/main/RNN_PLAY_GENERATOR.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
%tensorflow_version 2.x  # this line is not required unless you are in a notebook
from keras.preprocessing import sequence
import keras
import tensorflow as tf
import os
import numpy as np

Colab only includes TensorFlow 2.x; %tensorflow_version has no effect.


In [2]:
#download txt data
path_to_file = tf.keras.utils.get_file('shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')

Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt


In [3]:
# Read, then decode for py2 compat.
text = open(path_to_file, 'rb').read().decode(encoding='utf-8')
# length of text is the number of characters in it
print ('Length of text: {} characters'.format(len(text)))

Length of text: 1115394 characters


# Encoding & Decoding

In [4]:
vocab = sorted(set(text))
# Creating a mapping from unique characters to indices
char2idx = {u:i for i, u in enumerate(vocab)}
idx2char = np.array(vocab)
#encode characters with numbers
def text_to_int(text):
  return np.array([char2idx[c] for c in text])

text_as_int = text_to_int(text)

In [5]:
# lets look at how part of our text is encoded
print("Text:", text[:13])
print("Encoded:", text_to_int(text[:13]))

Text: First Citizen
Encoded: [18 47 56 57 58  1 15 47 58 47 64 43 52]


In [6]:
#decodes encoded numbers to text
def int_to_text(ints):
  try:
    ints = ints.numpy()
  except:
    pass
    #turn array of characters to a word
  return ''.join(idx2char[ints])

print(int_to_text(text_as_int[:13]))

First Citizen


# Training Example

In [7]:
seq_length = 100  # length of sequence for a training example
#+1 is here since we have a sequence of 100 for both inputs and outputs and were dropping last letter of a word e.g Hell and output is dropping first letter and predicting last letterello 
#hence if input is 100 to make room for lastletter or character been added to end of ouput we add +1
examples_per_epoch = len(text)//(seq_length+1)

# Create training examples / targets
#slices text into streams of characters in this case 1.1 million characters *check len(text)
char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)

In [8]:
char_dataset


<TensorSliceDataset element_spec=TensorSpec(shape=(), dtype=tf.int64, name=None)>

In [9]:
#splits characters into batches of 101 words and drop remaining
sequences = char_dataset.batch(seq_length+1, drop_remainder=True)

In [10]:
def split_input_target(chunk):  # for the example: hello
    input_text = chunk[:-1]  # hell
    target_text = chunk[1:]  # ello
    return input_text, target_text  # hell, ello
#perform above fuction to delete last letter from input text and delete first letter from target_text
dataset = sequences.map(split_input_target)  # we use map to apply the above function to every entry

In [11]:
#each input set and outtput set is 100 from sequence we set earlier
#were trying to see how what input sets and output sets look like for 3 sets
for x, y in dataset.take(3):
  print("\n\nEXAMPLE\n")
  print("INPUT")
  print(int_to_text(x))
  print("\nOUTPUT")
  print(int_to_text(y))




EXAMPLE

INPUT
First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You

OUTPUT
irst Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You 


EXAMPLE

INPUT
are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you 

OUTPUT
re all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you k


EXAMPLE

INPUT
now Caius Marcius is chief enemy to the people.

All:
We know't, we know't.

First Citizen:
Let us k

OUTPUT
ow Caius Marcius is chief enemy to the people.

All:
We know't, we know't.

First Citizen:
Let us ki


In [12]:
BATCH_SIZE = 64
VOCAB_SIZE = len(vocab)  # vocab is number of unique characters
EMBEDDING_DIM = 256
RNN_UNITS = 1024

# Buffer size to shuffle the dataset
# (TF data is designed to work with possibly infinite sequences,
# so it doesn't attempt to shuffle the entire sequence in memory. Instead,
# it maintains a buffer in which it shuffles elements).
BUFFER_SIZE = 10000

data = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)

# Build model


In [13]:
#build model
def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
  model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim,
                              batch_input_shape=[batch_size, None]),
    tf.keras.layers.LSTM(rnn_units,
                        return_sequences=True,
                        stateful=True,
                        recurrent_initializer='glorot_uniform'),
    tf.keras.layers.Dense(vocab_size)
  ])
  return model

model = build_model(VOCAB_SIZE,EMBEDDING_DIM, RNN_UNITS, BATCH_SIZE)
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (64, None, 256)           16640     
                                                                 
 lstm (LSTM)                 (64, None, 1024)          5246976   
                                                                 
 dense (Dense)               (64, None, 65)            66625     
                                                                 
Total params: 5,330,241
Trainable params: 5,330,241
Non-trainable params: 0
_________________________________________________________________


# Testing prediction output  for specific input

In [14]:
for input_example_batch, target_example_batch in data.take(1):
  example_batch_predictions = model(input_example_batch)  # ask our model for a prediction on our first batch of training data (64 entries)
  print(example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")  # print out the output shape

(64, 100, 65) # (batch_size, sequence_length, vocab_size)


In [15]:
# we can see that the predicition is an array of 64 arrays, one for each entry in the batch
print(len(example_batch_predictions))
print(example_batch_predictions)

64
tf.Tensor(
[[[-3.0920575e-03  1.2102443e-02 -2.6563727e-03 ...  3.4347866e-03
   -9.7456132e-04 -6.3701565e-03]
  [-4.5727724e-03  7.3613888e-03 -1.8997809e-03 ...  4.5943367e-03
    1.5493080e-03 -9.0386309e-03]
  [-5.6862948e-03  3.6427018e-03  2.1540108e-03 ... -6.6525768e-05
    3.1451089e-03 -7.0712767e-03]
  ...
  [ 1.6123627e-03  4.0005571e-03  7.9589235e-03 ...  2.3035419e-03
    6.9166226e-03  5.3818244e-04]
  [ 5.0101848e-03  7.7961758e-03  3.7273304e-03 ...  3.7068157e-03
    4.4503883e-03 -6.7872973e-04]
  [-2.0802887e-03  3.0087514e-03  1.1814416e-02 ...  6.4551425e-03
    3.1826806e-03 -4.3002400e-03]]

 [[-9.4900047e-04 -1.1094077e-03  4.1145869e-03 ... -2.8354456e-03
    1.0599319e-03 -7.8610977e-04]
  [ 2.1434433e-03 -1.4452636e-04  7.8681512e-03 ... -2.5834623e-03
    2.6574922e-03 -1.5061384e-03]
  [-1.8885038e-03  3.2339520e-03 -2.8425944e-04 ... -1.2872000e-03
    3.2528013e-04  8.1317499e-05]
  ...
  [ 2.7717971e-03  6.0483776e-03 -6.9269873e-03 ... -4.9720835e

In [16]:
#examining one prediction
pred = example_batch_predictions[0]
print(len(pred))
print(pred)
# notice this is a 2d array of length 100, where each interior array is the prediction for the next character at each time step

100
tf.Tensor(
[[-3.0920575e-03  1.2102443e-02 -2.6563727e-03 ...  3.4347866e-03
  -9.7456132e-04 -6.3701565e-03]
 [-4.5727724e-03  7.3613888e-03 -1.8997809e-03 ...  4.5943367e-03
   1.5493080e-03 -9.0386309e-03]
 [-5.6862948e-03  3.6427018e-03  2.1540108e-03 ... -6.6525768e-05
   3.1451089e-03 -7.0712767e-03]
 ...
 [ 1.6123627e-03  4.0005571e-03  7.9589235e-03 ...  2.3035419e-03
   6.9166226e-03  5.3818244e-04]
 [ 5.0101848e-03  7.7961758e-03  3.7273304e-03 ...  3.7068157e-03
   4.4503883e-03 -6.7872973e-04]
 [-2.0802887e-03  3.0087514e-03  1.1814416e-02 ...  6.4551425e-03
   3.1826806e-03 -4.3002400e-03]], shape=(100, 65), dtype=float32)


In [17]:
# and finally well look at a prediction at the first timestep
time_pred = pred[0]
print(len(time_pred))
print(time_pred)
# and of course its 65 values representing the probabillity of each character occuring next

65
tf.Tensor(
[-0.00309206  0.01210244 -0.00265637  0.00076803 -0.0039836   0.00240342
 -0.00127622 -0.00212301  0.00544059  0.00187594  0.00320759 -0.00542243
 -0.00062895  0.00292641  0.0019988  -0.00297241  0.00030146 -0.00538643
 -0.0009024  -0.00292946 -0.00177771  0.00312011  0.00166109  0.00145662
 -0.00195184  0.00271523 -0.00328902  0.00150295 -0.00095688 -0.00429017
  0.00155977  0.00624573 -0.00169129  0.00402553 -0.00295986  0.00476503
  0.00026091 -0.00090517 -0.00314567  0.00188257 -0.00071687 -0.00453363
  0.00156584  0.00292326 -0.00232929  0.00214368 -0.00594971  0.00327873
  0.00167878 -0.00039407 -0.00348324  0.00686434  0.00434144 -0.00251781
  0.00382506 -0.0024545  -0.00737088  0.00060163  0.00396298 -0.00034462
  0.00218714  0.00127676  0.00343479 -0.00097456 -0.00637016], shape=(65,), dtype=float32)


the output has a shape of (64,100,65)
meaning 64 batches, 100 output characters,65 predictions of what the output text is supposed to be they are then sampled and one of the 65 is chosen as each character of the 100 output characters.

In [18]:
# If we want to determine the predicted character we need to sample the output distribution (pick a value based on probabillity)
sampled_indices = tf.random.categorical(pred, num_samples=1)

# now we can reshape that array and convert all the integers to numbers to see the actual characters
sampled_indices = np.reshape(sampled_indices, (1, -1))[0]
predicted_chars = int_to_text(sampled_indices)

predicted_chars  # and this is what the model predicted for training sequence 1

'LdvTbI jZnob-:xSFXJ.foAqxWw cYkB hs,xQYqu33SfCZAuLF.gsiZRekrvg&3avIlnOwbk?sHd-xYQx:OlCsJ!RHQB?YEsCyV'

# Create Loss function

In [19]:
def loss(labels, logits):
  return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)

# Compile model
At this point we can think of our problem as a classification problem where the model predicts the probabillity of each unique letter coming next.

In [20]:
model.compile(optimizer='adam', loss=loss)

# Creating Checkpoints
Now we are going to setup and configure our model to save checkpoinst as it trains. This will allow us to load our model from a checkpoint and continue training it.



In [21]:
# Directory where the checkpoints will be saved
checkpoint_dir = './training_checkpoints'
# Name of the checkpoint files
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

checkpoint_callback=tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)

# Training model

In [22]:
history = model.fit(data, epochs=10, callbacks=[checkpoint_callback])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


# Loading the Model
We'll rebuild the model from a checkpoint using a batch_size of 1 so that we can feed one peice of text to the model and have it make a prediction.

In [24]:
model = build_model(VOCAB_SIZE, EMBEDDING_DIM, RNN_UNITS, batch_size=1)

Once the model is finished training, we can find the lastest checkpoint that stores the models weights using the following line.

In [25]:
model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))
model.build(tf.TensorShape([1, None]))

We can load any checkpoint we want by specifying the exact file to load.

In [26]:
checkpoint_num = 10
model.load_weights(tf.train.load_checkpoint("./training_checkpoints/ckpt_" + str(checkpoint_num)))
model.build(tf.TensorShape([1, None]))

AttributeError: ignored


# Generating Text
Now we can use the lovely function provided by tensorflow to generate some text using any starting string we'd like.

In [27]:
def generate_text(model, start_string):
  # Evaluation step (generating text using the learned model)

  # Number of characters to generate
  num_generate = 800

  # Converting our start string to numbers (vectorizing)
  input_eval = [char2idx[s] for s in start_string]
  input_eval = tf.expand_dims(input_eval, 0)

  # Empty string to store our results
  text_generated = []

  # Low temperatures results in more predictable text.
  # Higher temperatures results in more surprising text.
  # Experiment to find the best setting.
  temperature = 1.0

  # Here batch size == 1
  model.reset_states()
  for i in range(num_generate):
      predictions = model(input_eval)
      # remove the batch dimension
    
      predictions = tf.squeeze(predictions, 0)

      # using a categorical distribution to predict the character returned by the model
      predictions = predictions / temperature
      predicted_id = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy()

      # We pass the predicted character as the next input to the model
      # along with the previous hidden state
      input_eval = tf.expand_dims([predicted_id], 0)

      text_generated.append(idx2char[predicted_id])

  return (start_string + ''.join(text_generated))

In [None]:
inp = input("Type a starting string: ")
print(generate_text(model, inp))

Type a starting string: love is war
love is wary of shine terrors.

WARWICK:
O hou! Valt, one work, parting in
the subdues of your name, I beseech you.
Come, Camillo, an what little sort to Bohemia,
Since I am limit to my father was mine own
A thousand duch, which thou threat'st our country pray.
There's some showers, love, with hand and how much
her luck war, in the hollow grow of a man.

BIONUED:
There's no more said when that's made fair rash. This is an irother's gald,
That instige thy thousand King of Henry, revenge,
Too dear account the men, she is lefthe way.
I'll know the mins have gatest.

ARIEL:
My lord!

KING HENRY VI:
And let us hear her speak a'l goodly to him,
And rest to prison, of the queen and his blisson. I dare not offence
That woe requite awhile,
It shall be soleth time have lent her schill;
Let's marry her old abov
