#Setup

In [1]:
import tensorflow as tf

import numpy as np
import os
import time

from keras.preprocessing.text import text_to_word_sequence
import string

#Downloading the Shakespeare dataset

In [2]:
path_to_file = tf.keras.utils.get_file('shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')

In [3]:
text = open(path_to_file, 'rb').read().decode(encoding='utf-8')
print ('Length of text: {} characters'.format(len(text)))

Length of text: 1115394 characters


#Data Processing

In [4]:
def clean_text(txt):
    txt = "".join(v for v in txt if v not in string.punctuation).lower()
    txt = txt.encode("utf8").decode("ascii",'ignore')
    return txt 

In [5]:
corpus = clean_text(text)
data = text_to_word_sequence(corpus)
vocab = sorted(set(data))
vocab_size = len(vocab)

In [6]:
word2idx = {u:i for i, u in enumerate(vocab)}
idx2word = np.array(vocab)

text_as_int = np.array([word2idx[c] for c in data])

# Train Data Preparation

In [7]:
seq_length = 100
examples_per_epoch = len(text)//(seq_length+1)

dataset = tf.data.Dataset.from_tensor_slices(text_as_int)

In [8]:
sequences = dataset.batch(seq_length+1, drop_remainder=True)

In [9]:
def split_input_target(chunk):
  input_text = chunk[:-1]
  target_text = chunk[1:]
  return input_text, target_text

In [10]:
BATCH_SIZE = 64
BUFFER_SIZE = 10000

train = sequences.map(split_input_target)
train = train.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)
train

<BatchDataset shapes: ((64, 100), (64, 100)), types: (tf.int64, tf.int64)>

#Model Preparation

In [11]:
embedding_dim = 256
rnn_units = 1024

In [12]:
def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
  model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim,
                              batch_input_shape=[batch_size, None]),
    tf.keras.layers.LSTM(rnn_units,
                        return_sequences=True,
                        stateful=True,
                        recurrent_initializer='glorot_uniform'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(vocab_size),
  ])
  return model

In [13]:
model = build_model(
    vocab_size = len(vocab),
    embedding_dim=embedding_dim,
    rnn_units=rnn_units,
    batch_size=BATCH_SIZE)

In [14]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (64, None, 256)           3289088   
_________________________________________________________________
lstm (LSTM)                  (64, None, 1024)          5246976   
_________________________________________________________________
batch_normalization (BatchNo (64, None, 1024)          4096      
_________________________________________________________________
dense (Dense)                (64, None, 12848)         13169200  
Total params: 21,709,360
Trainable params: 21,707,312
Non-trainable params: 2,048
_________________________________________________________________


#Compiling and Training the Model 

In [15]:
def loss(labels, logits):
  return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)

In [16]:
model.compile(optimizer='adam', loss=loss, metrics=['accuracy'])

In [17]:
checkpoint_dir = './training_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

checkpoint_callback=tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)

In [18]:
EPOCHS=63

In [19]:
history = model.fit(train, epochs=EPOCHS, callbacks=[checkpoint_callback])

Epoch 1/63
Epoch 2/63
Epoch 3/63
Epoch 4/63
Epoch 5/63
Epoch 6/63
Epoch 7/63
Epoch 8/63
Epoch 9/63
Epoch 10/63
Epoch 11/63
Epoch 12/63
Epoch 13/63
Epoch 14/63
Epoch 15/63
Epoch 16/63
Epoch 17/63
Epoch 18/63
Epoch 19/63
Epoch 20/63
Epoch 21/63
Epoch 22/63
Epoch 23/63
Epoch 24/63
Epoch 25/63
Epoch 26/63
Epoch 27/63
Epoch 28/63
Epoch 29/63
Epoch 30/63
Epoch 31/63
Epoch 32/63
Epoch 33/63
Epoch 34/63
Epoch 35/63
Epoch 36/63
Epoch 37/63
Epoch 38/63
Epoch 39/63
Epoch 40/63
Epoch 41/63
Epoch 42/63
Epoch 43/63
Epoch 44/63
Epoch 45/63
Epoch 46/63
Epoch 47/63
Epoch 48/63
Epoch 49/63
Epoch 50/63
Epoch 51/63
Epoch 52/63
Epoch 53/63
Epoch 54/63
Epoch 55/63
Epoch 56/63
Epoch 57/63
Epoch 58/63
Epoch 59/63
Epoch 60/63
Epoch 61/63
Epoch 62/63
Epoch 63/63


# Generating text

In [20]:
tf.train.latest_checkpoint(checkpoint_dir)

'./training_checkpoints/ckpt_63'

In [21]:
model = build_model(vocab_size, embedding_dim, rnn_units, batch_size=1)

model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))

model.build(tf.TensorShape([1, None]))

In [22]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (1, None, 256)            3289088   
_________________________________________________________________
lstm_1 (LSTM)                (1, None, 1024)           5246976   
_________________________________________________________________
batch_normalization_1 (Batch (1, None, 1024)           4096      
_________________________________________________________________
dense_1 (Dense)              (1, None, 12848)          13169200  
Total params: 21,709,360
Trainable params: 21,707,312
Non-trainable params: 2,048
_________________________________________________________________


In [23]:
def generate_text(model, start_string):

  # Number of words to generate
  num_generate = 10

  input_eval = [word2idx[word] for word in text_to_word_sequence(start_string)]
  input_eval = tf.expand_dims(input_eval, 0)

  text_generated = []

  temperature = 2.0

  model.reset_states()
  for i in range(num_generate):
    predictions = model(input_eval)
    predictions = tf.squeeze(predictions, 0)

    predictions = predictions / temperature
    predicted_id = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy()

    # We pass the predicted character as the next input to the model
    # along with the previous hidden state
    input_eval = tf.expand_dims([predicted_id], 0)

    text_generated.append(idx2word[predicted_id])

  return (start_string + ' ' + ' '.join(text_generated))

# Result

In [24]:
sample_text = ["Julius", "Thou", "King is", "Death of", "The Princess"]

for pos, word in enumerate(sample_text):
  print(pos+1, generate_text(model, start_string = word))

1 Julius allhating hasty benefactors once again where is my lord with
2 Thou man multiply they mistresslet despised so we enforce received why
3 King is all prithee formerly menenius thats it me skills not much
4 Death of paulinaa shadow hearing nor ope her lap she saintseducing even
5 The Princess my arms no asking my good at the time taen
