# A Text Generation RNN

## Brian Chen | 2020

Trained on Shakespeare for demonstration purposes

*Based on https://www.tensorflow.org/tutorials/text/text_generation*

In [0]:
#imports

import tensorflow as tf
import numpy as np
import os
import time

In [8]:
!wget https://ocw.mit.edu/ans7870/6/6.006/s08/lecturenotes/files/t8.shakespeare.txt

--2020-05-05 21:18:59--  https://ocw.mit.edu/ans7870/6/6.006/s08/lecturenotes/files/t8.shakespeare.txt
Resolving ocw.mit.edu (ocw.mit.edu)... 104.66.108.32, 2a02:26f0:fe00:1b7::18a8, 2a02:26f0:fe00:198::18a8
Connecting to ocw.mit.edu (ocw.mit.edu)|104.66.108.32|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 5458199 (5.2M) [text/plain]
Saving to: ‘t8.shakespeare.txt.1’


2020-05-05 21:18:59 (71.8 MB/s) - ‘t8.shakespeare.txt.1’ saved [5458199/5458199]



In [0]:
complete_works = open("/content/drive/My Drive/TextGen RNN/Datasets/t8.shakespeare.txt", "r").read()

In [10]:
unique_chars = sorted(set(complete_works))
print(len(unique_chars), unique_chars)

84 ['\n', ' ', '!', '"', '&', "'", '(', ')', ',', '-', '.', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '>', '?', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '[', ']', '_', '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '|', '}']


In [0]:
char_to_index = {u:i for i, u in enumerate(unique_chars)}
index_to_char = np.array(unique_chars)
text_as_int = np.array([char_to_index[c] for c in complete_works]) #abcd -> 1,2,3,4, for example

In [0]:
#parameters
max_input_length = 100
examples_per_epoch = len(complete_works)

dataset = tf.data.Dataset.from_tensor_slices(text_as_int)

In [0]:
#turn chars into sequences of size
seqs = dataset.batch(max_input_length+1, drop_remainder=True)

In [14]:
for i in seqs.take(5):
  print(repr(''.join(index_to_char[i.numpy()])))

'The Complete Works of William Shakespeare\n\n1609\n\nTHE SONNETS\n\nby William Shakespeare\n\n\n\n             '
"        1\n  From fairest creatures we desire increase,\n  That thereby beauty's rose might never die,\n"
'  But as the riper should by time decease,\n  His tender heir might bear his memory:\n  But thou contra'
"cted to thine own bright eyes,\n  Feed'st thy light's flame with self-substantial fuel,\n  Making a fam"
'ine where abundance lies,\n  Thy self thy foe, to thy sweet self too cruel:\n  Thou that art now the wo'


In [0]:
#duplicate input to create target & input text
"""for example:
Text: ABCDE
Input: ABCDE
Target: BCDE
Thus, for input A, expect output = B, input = B, expected output=C..."""
def create_target_input(sequence):
  input = sequence[:-1]
  target = sequence[1:]
  return input, target

In [0]:
dataset = seqs.map(create_target_input)

In [17]:
BATCH_SIZE = 64
BUFFER_SIZE =  10000

dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)
dataset

<BatchDataset shapes: ((64, 100), (64, 100)), types: (tf.int64, tf.int64)>

In [0]:
vocab_size = len(unique_chars)
embedding_size = 256
rnn_units = 1024

In [0]:
def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
  model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim,
                              batch_input_shape=[batch_size, None]),
    tf.keras.layers.GRU(rnn_units,
                        return_sequences=True,
                        stateful=True,
                        recurrent_initializer='glorot_uniform'),
    tf.keras.layers.GRU(rnn_units,
                        return_sequences=True,
                        stateful=True,
                        recurrent_initializer='glorot_uniform'),
    tf.keras.layers.Dense(vocab_size)
  ]) 

#sequential model w/ embedding layer for input, GRU as RNN (can use LSTM), dense layer for output (similar to sigver in some ways!)
  return model

In [20]:
shakspeard = build_model(vocab_size, embedding_size, rnn_units, BATCH_SIZE)
shakspeard.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (64, None, 256)           21504     
_________________________________________________________________
gru (GRU)                    (64, None, 1024)          3938304   
_________________________________________________________________
gru_1 (GRU)                  (64, None, 1024)          6297600   
_________________________________________________________________
dense (Dense)                (64, None, 84)            86100     
Total params: 10,343,508
Trainable params: 10,343,508
Non-trainable params: 0
_________________________________________________________________


In [0]:
#Train Model
def loss(labels, logits): #model returns logits, so from_logits=True (log-odds, log of probability)
  return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)

In [0]:
shakspeard.compile(optimizer="adam", loss=loss) #adam is usally the best choice

In [0]:
check_dir = "/content/drive/My Drive/TextGen RNN/Checkpoints_Shakespeare"
check_file_dir = os.path.join(check_dir, "checkpoint_{epoch}")

checkpoints = tf.keras.callbacks.ModelCheckpoint(filepath = check_file_dir, save_weights_only=True)

In [34]:
EPOCHS = 20
STEPS= 300
history = shakspeard.fit(dataset, epochs = EPOCHS, steps_per_epoch = STEPS, callbacks = [checkpoints])

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20


In [35]:
tf.train.latest_checkpoint(check_dir) #get latest chkpoint

'/content/drive/My Drive/TextGen RNN/Checkpoints_Shakespeare/checkpoint_11'

In [36]:
prediktor = build_model(vocab_size, embedding_dim=embedding_size, rnn_units=rnn_units, batch_size = 1)
prediktor.load_weights(tf.train.latest_checkpoint(check_dir))
prediktor.build(tf.TensorShape([1, None]))
prediktor.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_3 (Embedding)      (1, None, 256)            21504     
_________________________________________________________________
gru_6 (GRU)                  (1, None, 1024)           3938304   
_________________________________________________________________
gru_7 (GRU)                  (1, None, 1024)           6297600   
_________________________________________________________________
dense_3 (Dense)              (1, None, 84)             86100     
Total params: 10,343,508
Trainable params: 10,343,508
Non-trainable params: 0
_________________________________________________________________


In [0]:
def gen_text(model, seed, num_generate, temp):
  """
  :param model: tf/keras model
  :param seed: first line of text to build off of
  :param num_generate: amount of chars to predict
  :param temp: how much variance is allowed in predictions: higher = more varied, and vice versa
  """
  gen_input = [char_to_index[i] for i in seed]
  gen_input = tf.expand_dims(gen_input, 0)
  out = []
  
  model.reset_states()
  for i in range(num_generate):
    pred = model(gen_input)
    pred = tf.squeeze(pred, 0)
    pred = pred/temp
    pred_id = tf.random.categorical(pred, num_samples = 1)[-1, 0].numpy()
    #add predicted output to next output
    gen_input = tf.expand_dims([pred_id], 0)
    out.append(index_to_char[pred_id])
  return (seed + "".join(out))


In [0]:
ex_out = gen_text(prediktor, "Hamlet", 2000, 1)

In [39]:
print(ex_out)

Hamlet
    I call; where be your reason, after
    Be thou th' men, the mind shall go alone. Ears serv'd
    the weaker bitter tears. Then they find on the water or mistaking
    May tear sur shines be to the sea-seal'd- gentlemen,
    I'll hide my sons: for every day before some
    late assurance, and she cannot be so sacrifice. Entreat FLORIZEL. Resure, some business I have miss.'  
    Away, thou godsdst hence. [Exit Attendants].

         Here in an art no table; we willing I love-
    O.
  SHEPHERD. Let. I have your lordship is in a dreamer.
  TROILUS. Farewell, Andronicus; let them good peace
    The quality of their own services done
    And haply within! What, Troyans!
  GREMIO. For mine, not reason colour'd shore!       Exeunt

                 MARCUS with them at supper?
  PROTEUS. I love and harm.
  TRANIO. Let plainly have no vain that I might not conditu such as welcome how she is;
    And if not black against thy heart
    Is cold-made better broad and weapons dry,
    W