In [None]:
import tensorflow as tf

import numpy as np
import os
import time

In [None]:
path_to_file = tf.keras.utils.get_file('shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')

Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt


In [None]:
text = open(path_to_file, 'rb').read().decode(encoding = 'utf-8')
# unique characters in the file 
vocab = sorted(set(text))

In [None]:
# convert strings/tokens to a numeric ID 
# (we are creating a layer that translates a set of arbitrary strings into 
# integer output via a given vocabulary)
ids_from_chars = tf.keras.layers.StringLookup(vocabulary = list(vocab), mask_token = None)
# convert IDs back to characters.
# (get_vocabulary returns the current layer's vocabulary; we use this instead of
# the original vocab list so that UNK tokens are set correctly) 
chars_from_ids = tf.keras.layers.StringLookup(vocabulary=ids_from_chars.get_vocabulary(), invert=True, mask_token=None)
# join characters from IDs 
def text_from_ids(ids):
  return tf.strings.reduce_join(chars_from_ids(ids), axis=-1)

In [None]:
# tokenize our text and get their IDs 
all_ids = ids_from_chars(tf.strings.unicode_split(text, 'UTF-8'))

In [None]:
# converts text vector into a stream of character indices 
ids_dataset = tf.data.Dataset.from_tensor_slices(all_ids)

# convert individual characters to sequences of desired size
seq_length = 100
sequences = ids_dataset.batch(seq_length+1, drop_remainder = True)

# function that splits input into (training, label) tuple where the label
# is shifted one step over to the right 
def split_input_target(sequence):
    input_text = sequence[:-1]
    target_text = sequence[1:]
    return input_text, target_text

dataset = sequences.map(split_input_target)
for input_example, target_example in dataset.take(1):
    print("Input :", text_from_ids(input_example).numpy())
    print("Target:", text_from_ids(target_example).numpy())

Input : b'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou'
Target: b'irst Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou '


In [None]:
# here, we shuffle the data and pack it into batches before feeding it
# into the model

# Batch size
BATCH_SIZE = 64

# Buffer size to shuffle the dataset
# (TF data maintains a buffer in which it shuffles elements).
BUFFER_SIZE = 10000

dataset = (
    dataset
    .shuffle(BUFFER_SIZE)
    .batch(BATCH_SIZE, drop_remainder=True)
    .prefetch(tf.data.experimental.AUTOTUNE))

In [None]:
def create_model(vocab_size, embed_dim, rnn_neurons, batch_size):
  model = tf.keras.Sequential()
  model.add(tf.keras.layers.Embedding(vocab_size, embed_dim,batch_input_shape=[batch_size, None]))
  model.add(tf.keras.layers.LSTM(rnn_neurons,return_sequences=True,stateful=True,recurrent_initializer='glorot_uniform'))
  # Final Dense Layer to Predict
  model.add(tf.keras.layers.Dense(vocab_size))
  loss = tf.losses.SparseCategoricalCrossentropy(from_logits = True)
  model.compile(optimizer='adam', loss = loss) 
  return model

In [None]:
vocab_size = len(ids_from_chars.get_vocabulary())
embedding_dim = 256
lstm_units = 1024 #dimensionality output 

In [None]:
model = create_model(
    vocab_size = vocab_size,
    embed_dim = embedding_dim,
    rnn_neurons = lstm_units,
    batch_size = BATCH_SIZE
)

In [None]:
history = model.fit(dataset, epochs = 20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [None]:
model.save('shakespeare_gen.h5') 

model = create_model(vocab_size, embedding_dim, lstm_units, batch_size=1)
model.load_weights('shakespeare_gen.h5')
model.build(tf.TensorShape([1, None]))

def generate_text(model, start_seed,gen_size=100,temp=1.0):
  # Number of characters to generate
  num_generate = gen_size
  # Vecotrizing starting seed text
  input_chars = tf.strings.unicode_split(start_seed, 'UTF-8')
  input_eval = ids_from_chars(input_chars).to_tensor()
  # Expand to match batch format shape
  #input_eval = tf.expand_dims(input_eval, 0)
  # Empty list to hold resulting generated text
  text_generated = []
  #text_generated.append(ids_from_chars(input_chars))
  # Temperature effects randomness in our resulting text
  # The term is derived from entropy/thermodynamics.
  # The temperature is used to effect probability of next characters.
  # Higher probability == lesss surprising/ more expected
  # Lower temperature == more surprising / less expected
  temperature = temp
  # Here batch size == 1
  model.reset_states()
  for i in range(num_generate):
    # Generate Predictions
    predictions = model(input_eval)
    # Remove the batch shape dimension
    predictions = tf.squeeze(predictions, 0)
    # Use a cateogircal disitribution to select the next character
    predictions = predictions / temperature
    predicted_id = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy()
    # Pass the predicted charracter for the next input
    input_eval = tf.expand_dims([predicted_id], 0)
    # Transform back to character letter
    text_generated.append(predicted_id)
  return(text_from_ids(text_generated).numpy().decode('utf-8'))

In [None]:
# testing
print(generate_text(model,tf.constant(['Juliet:']),gen_size=1000))

Juliet:
But in your cries in 's fledier.

GREOMANDER:
Good faith, yet give you me a father, speak.

LEONTES:
Force her to be king'?

SICINIUS:
He that love me me, or else your highness sat's them?

RAMNLIO:
How! wear I mean i' the Duke of York?

YORTHAM:
What, masters, I do smile? 'twill be alliested
Must ed the intemptual daggers. Farewell is guilty of heart
Ago to our lord, 'tis now we meet him: let
a puppet in blood and height wit,
While I awhile with dight happy ripents. Once, or of our love?
Alack, for me, I trust me, gentle lier.

JULIET:
I dare be infected
This grief that seems unsatisfied! My words die,
Rich me more stripps Provost.

GRUMIO:
Hark, in bawd; for powers to die.

PERDIT:
Therefore was it.

RICHMOND:
What must be back?

Second Citizen:
An ship in you: any thing, one gire;
I'll to-day unbudge-by whipp'd the fre?
What such the queen mean to lives unstanding innincess
Made people and to issue out hath flower out me a present, as the drum,
Are as so best becomes for; an