In [6]:
import tensorflow as tf


In [3]:
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout, Bidirectional,GRU
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import regularizers
import tensorflow.keras.utils as ku 
import numpy as np 
import os

In [5]:
path_to_file = tf.keras.utils.get_file('shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')

# Read, then decode for py2 compat.
text = open(path_to_file, 'rb').read().decode(encoding='utf-8')
# length of text is the number of characters in it
print ('Length of text: {} characters'.format(len(text)))

vocab = sorted(set(text))
print ('{} unique characters'.format(len(vocab)))

# Creating a mapping from unique characters to indices
char2idx = {u:i for i, u in enumerate(vocab)}
idx2char = np.array(vocab)

text_as_int = np.array([char2idx[c] for c in text])


# The maximum length sentence we want for a single input in characters
seq_length = 100
examples_per_epoch = len(text)//(seq_length+1)

# Create training examples / targets
char_dataset = np.array(text_as_int)


Length of text: 1115394 characters
65 unique characters


In [8]:
sequences = []
final_index = 0
item = 0
for item in range(len(char_dataset)):
    item = final_index

    if (final_index+seq_length+1)>len(char_dataset):
        break
    
    sequences.append(char_dataset[item:(item+seq_length+1)])
    
    final_index = item+seq_length+1


    
    

In [9]:
for item in sequences[:3]:
    print(item)
    print(repr(''.join(idx2char[item])))

[18 47 56 57 58  1 15 47 58 47 64 43 52 10  0 14 43 44 53 56 43  1 61 43
  1 54 56 53 41 43 43 42  1 39 52 63  1 44 59 56 58 46 43 56  6  1 46 43
 39 56  1 51 43  1 57 54 43 39 49  8  0  0 13 50 50 10  0 31 54 43 39 49
  6  1 57 54 43 39 49  8  0  0 18 47 56 57 58  1 15 47 58 47 64 43 52 10
  0 37 53 59  1]
'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou '
[39 56 43  1 39 50 50  1 56 43 57 53 50 60 43 42  1 56 39 58 46 43 56  1
 58 53  1 42 47 43  1 58 46 39 52  1 58 53  1 44 39 51 47 57 46 12  0  0
 13 50 50 10  0 30 43 57 53 50 60 43 42  8  1 56 43 57 53 50 60 43 42  8
  0  0 18 47 56 57 58  1 15 47 58 47 64 43 52 10  0 18 47 56 57 58  6  1
 63 53 59  1 49]
'are all resolved rather to die than to famish?\n\nAll:\nResolved. resolved.\n\nFirst Citizen:\nFirst, you k'
[52 53 61  1 15 39 47 59 57  1 25 39 56 41 47 59 57  1 47 57  1 41 46 47
 43 44  1 43 52 43 51 63  1 58 53  1 58 46 43  1 54 43 53 54 50 43  8  0
  0 13 50 50 10

In [14]:
X_train = []
y_train = []
for sentence in sequences:
    input_text = sentence[:-1]
    target_text = sentence[1:]
    X_train.append(input_text)
    y_train.append(target_text)
    
    


In [15]:
# Length of the vocabulary in chars
vocab_size = len(vocab)

# The embedding dimension
embedding_dim = 256

# Number of RNN units
rnn_units = 1024
# Batch size
BATCH_SIZE = 64

In [16]:
def loss(labels, logits):
    return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)

def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
    model = Sequential()
    model.add(Embedding(vocab_size, embedding_dim,batch_input_shape=[batch_size, None]))
    model.add(GRU(rnn_units, return_sequences = True,stateful=True,
                          recurrent_initializer='glorot_uniform'))
    model.add(Dense(vocab_size))

    return (model)



In [17]:
# Directory where the checkpoints will be saved
checkpoint_dir = './training_checkpoints'
# Name of the checkpoint files
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

checkpoint_callback=tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)

In [18]:
model = build_model(
    vocab_size = len(vocab),
    embedding_dim=embedding_dim,
    rnn_units=rnn_units,
    batch_size=BATCH_SIZE)
model.compile(optimizer='adam', loss=loss)

In [19]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (64, None, 256)           16640     
_________________________________________________________________
unified_gru (UnifiedGRU)     (64, None, 1024)          3938304   
_________________________________________________________________
dense (Dense)                (64, None, 65)            66625     
Total params: 4,021,569
Trainable params: 4,021,569
Non-trainable params: 0
_________________________________________________________________


In [20]:
X_train = np.array(X_train)
y_train = np.array(y_train)
STEPS = X_train.shape[0]//BATCH_SIZE



In [21]:
X_train = X_train[0:STEPS*BATCH_SIZE]
y_train = y_train[0:STEPS*BATCH_SIZE]

In [23]:

EPOCHS=10

history = model.fit(X_train,y_train, epochs=EPOCHS, callbacks=[checkpoint_callback],batch_size = BATCH_SIZE)

In [None]:
tf.train.latest_checkpoint(checkpoint_dir)

In [None]:
model = build_model(vocab_size, embedding_dim, rnn_units, batch_size=1)

model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))

model.build(tf.TensorShape([1, None]))

In [None]:
model.summary()

Model: "sequential_26"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_25 (Embedding)     (1, None, 256)            16640     
_________________________________________________________________
gru_17 (GRU)                 (1, None, 1024)           3938304   
_________________________________________________________________
dense_23 (Dense)             (1, None, 65)             66625     
Total params: 4,021,569
Trainable params: 4,021,569
Non-trainable params: 0
_________________________________________________________________


In [None]:
def generate_text(model, start_string):
    # Evaluation step (generating text using the learned model)

    # Number of characters to generate
    num_generate = 1000

    # Converting our start string to numbers (vectorizing)
    input_eval = [char2idx[s] for s in start_string]

    input_eval = tf.expand_dims(input_eval, 0)


    # Empty string to store our results
    text_generated = []

    # Low temperatures results in more predictable text.
    # Higher temperatures results in more surprising text.
    # Experiment to find the best setting.
    temperature = 1.0

    # Here batch size == 1
    model.reset_states()
    for i in range(num_generate):
        input_eval = tf.cast(input_eval, dtype=tf.float32)
        predictions = model(input_eval)

        # remove the batch dimension
        predictions = tf.squeeze(predictions, 0)

        # using a categorical distribution to predict the character returned by the model
        predictions = predictions / temperature
        predicted_id = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy()

        # We pass the predicted character as the next input to the model
        # along with the previous hidden state
        input_eval = tf.expand_dims([predicted_id], 0)


        text_generated.append(idx2char[predicted_id])

    return (start_string + ''.join(text_generated))

In [None]:
print(generate_text(model, start_string=u"ROMEO: "))

ROMEO: Iffit of; often but salutly
And these shatterer for children.

First Servingman:
And I the woful think; the offentisemance,
That govethem to her, Edbained;
They had them accepary here; he are untimely to temple
When I should temper to my blood
Hath give up the normed lord; 'Duch yet your Volsciance and make in Forces and hip;
Thou hast soon shamewell applice; he heard thy
gawn:
Stay whereons wherein God and do that
half not one to barded,
He die in wistable. Truth, Rithan go and pull tell you?
This we will have clue feel an hour of like
By heavours and both stains: as either
Bestridet not men: it will not gubtly PaQULIZABETH:
And say I; with any charge the time to call upon our lands.

KING LEWIS XI:
And Sto Slander's line? Nay, as their prudeness of my jest.
My bloody danger, look, daughter, his business
sings.

QUEEN MARGARET:
Forward, your love dely fly, and yet again with honours,
So many obstruct delay:
Our triam of executioner,
I am not queen.

JULIET:
Goy, knot! how here'