In [141]:
from __future__ import absolute_import, division, print_function, unicode_literals

import tensorflow as tf
import numpy as np
import os
import time

In [142]:
text = open('shakespeare.txt', 'rb').read().decode(encoding='utf-8')  

In [143]:
len(text)

1115394

In [144]:
unique = sorted(set(text))
len(unique)

65

In [145]:
# Vectorizing the text (Mapping characters to numbers)

char2idx = {u:i for i, u in enumerate(unique)}
idx2char = np.array(unique)

text_as_int = np.array([char2idx[c] for c in text])

In [146]:
print('{')
for char,_ in zip(char2idx, range(65)):
    print('  {:4s}: {:3d},'.format(repr(char), char2idx[char]))
print('  ...\n}')

{
  '\n':   0,
  ' ' :   1,
  '!' :   2,
  '$' :   3,
  '&' :   4,
  "'" :   5,
  ',' :   6,
  '-' :   7,
  '.' :   8,
  '3' :   9,
  ':' :  10,
  ';' :  11,
  '?' :  12,
  'A' :  13,
  'B' :  14,
  'C' :  15,
  'D' :  16,
  'E' :  17,
  'F' :  18,
  'G' :  19,
  'H' :  20,
  'I' :  21,
  'J' :  22,
  'K' :  23,
  'L' :  24,
  'M' :  25,
  'N' :  26,
  'O' :  27,
  'P' :  28,
  'Q' :  29,
  'R' :  30,
  'S' :  31,
  'T' :  32,
  'U' :  33,
  'V' :  34,
  'W' :  35,
  'X' :  36,
  'Y' :  37,
  'Z' :  38,
  'a' :  39,
  'b' :  40,
  'c' :  41,
  'd' :  42,
  'e' :  43,
  'f' :  44,
  'g' :  45,
  'h' :  46,
  'i' :  47,
  'j' :  48,
  'k' :  49,
  'l' :  50,
  'm' :  51,
  'n' :  52,
  'o' :  53,
  'p' :  54,
  'q' :  55,
  'r' :  56,
  's' :  57,
  't' :  58,
  'u' :  59,
  'v' :  60,
  'w' :  61,
  'x' :  62,
  'y' :  63,
  'z' :  64,
  ...
}


In [147]:
#  creating training examples and targets
seq_length = 100
examples_per_epoch = len(text)//(seq_length)

char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)

for i in char_dataset.take(5):
    print(idx2char[i.numpy()])
    

F
i
r
s
t


In [148]:
sequences = char_dataset.batch(seq_length, drop_remainder=True)

for item in sequences.take(5):
    print(repr(''.join(idx2char[item.numpy()])))

'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou'
' are all resolved rather to die than to famish?\n\nAll:\nResolved. resolved.\n\nFirst Citizen:\nFirst, you'
" know Caius Marcius is chief enemy to the people.\n\nAll:\nWe know't, we know't.\n\nFirst Citizen:\nLet us"
" kill him, and we'll have corn at our own price.\nIs't a verdict?\n\nAll:\nNo more talking on't; let it "
'be done: away, away!\n\nSecond Citizen:\nOne word, good citizens.\n\nFirst Citizen:\nWe are accounted poor'


In [149]:
def split_input_target(chunk):
    input_text = chunk[:-1]
    target_text = chunk[1:]
    return input_text, target_text

dataset = sequences.map(split_input_target)

In [150]:
# making batches
batch_size = 64
buffer_size = 10000
dataset = dataset.shuffle(buffer_size).batch(batch_size, drop_remainder=True)
dataset

<_BatchDataset element_spec=(TensorSpec(shape=(64, 99), dtype=tf.int64, name=None), TensorSpec(shape=(64, 99), dtype=tf.int64, name=None))>

In [151]:
vocab_size = len(unique)
embedding_dim = 256
rnn_units = 1024

In [152]:
def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
    model = tf.keras.Sequential([
        tf.keras.layers.Embedding(vocab_size, output_dim=256, input_length=None),
        tf.keras.layers.LSTM(rnn_units, return_sequences=True, stateful=True, recurrent_initializer='glorot_uniform'),
        tf.keras.layers.Dense(vocab_size)
    ])
    return model


In [153]:
model = build_model(
    vocab_size = len(unique),
    embedding_dim=embedding_dim,
    rnn_units=rnn_units,
    batch_size=batch_size
)

In [154]:
# Trying
for input_example_batch, target_example_batch in dataset.take(1):
    example_batch_predictions = model(input_example_batch)
    print(input_example_batch.shape, target_example_batch.shape)


(64, 99) (64, 99)


In [155]:
model.summary()

In [156]:
sample_indices = tf.random.categorical(example_batch_predictions[0], num_samples=1)
sample_indices = tf.squeeze(sample_indices,axis= -1).numpy()

In [157]:
sample_indices

array([63, 30,  0, 52, 10, 12, 12, 44,  4, 51, 46, 27, 50, 17, 45, 37, 44,
       54, 45, 20, 26, 54, 34, 14, 61, 55, 44,  1, 56, 54, 57, 29, 52, 21,
       54,  6, 17,  0,  0, 57, 27, 17, 37, 42, 42, 22, 53, 62, 47, 11, 33,
        6, 39, 21, 51, 24, 56, 52, 61, 54, 33, 38, 16,  2, 53, 26, 12, 20,
       16, 15, 33, 35, 43, 51, 42, 63, 60, 31, 29, 33, 16,  0, 55, 26, 42,
       13, 12, 22, 45, 53,  2,  3,  8,  0, 24, 45, 24, 33, 41])

In [158]:
print("Input: \n", repr("".join(idx2char[input_example_batch[0]])))
print()
print("Next character prediction: ", repr("".join(idx2char[sample_indices])))

Input: 
 'the point of death\nHave they been merry! which their keepers call\nA lightning before death: O, how '

Next character prediction:  'yR\nn:??f&mhOlEgYfpgHNpVBwqf rpsQnIp,E\n\nsOEYddJoxi;U,aImLrnwpUZD!oN?HDCUWemdyvSQUD\nqNdA?Jgo!$.\nLgLUc'


In [159]:
def loss(labels, logits):
    return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)



In [160]:
example_batch_loss = loss(target_example_batch, example_batch_predictions)
print("Prediction shape: ", example_batch_predictions.shape, " # (batch_size, sequence_length, vocab_size)")
print("scalar_loss:      ", example_batch_loss.numpy().mean())

Prediction shape:  (64, 99, 65)  # (batch_size, sequence_length, vocab_size)
scalar_loss:       4.173333


In [161]:
model.compile(optimizer='adam', loss=loss)

In [162]:
checkpoint_dir = './training_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}.weights.h5")

checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True
)


In [163]:
EPOCHS = 10


In [164]:
HISTORY = model.fit(dataset, epochs=EPOCHS, callbacks=[checkpoint_callback])

Epoch 1/10
[1m174/174[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m102s[0m 583ms/step - loss: 2.8796
Epoch 2/10
[1m174/174[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m102s[0m 582ms/step - loss: 1.8654
Epoch 3/10
[1m174/174[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m105s[0m 599ms/step - loss: 1.6119
Epoch 4/10
[1m174/174[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m106s[0m 603ms/step - loss: 1.4896
Epoch 5/10
[1m174/174[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m106s[0m 608ms/step - loss: 1.4110
Epoch 6/10
[1m174/174[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m106s[0m 608ms/step - loss: 1.3571
Epoch 7/10
[1m174/174[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m325s[0m 2s/step - loss: 1.3171
Epoch 8/10
[1m174/174[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m103s[0m 587ms/step - loss: 1.2804
Epoch 9/10
[1m174/174[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m104s[0m 594ms/step - loss: 1.2439
Epoch 10/10
[1m174/174[0m [32m━━━━━━━━━━━━━━━━━━━━[0m

In [165]:
tf.train.latest_checkpoint(checkpoint_dir)

In [167]:
print("Available checkpoints:", os.listdir(checkpoint_dir))

Available checkpoints: ['ckpt_7.weights.h5', 'ckpt_2.weights.h5', 'ckpt_8.weights.h5', 'ckpt_5.weights.h5', 'ckpt_3.weights.h5', 'ckpt_6.weights.h5', 'ckpt_1.weights.h5', 'ckpt_10.weights.h5', 'ckpt_9.weights.h5', 'ckpt_4.weights.h5']


In [169]:
# Finding the latest checkpoint
checkpoint_files = sorted(os.listdir(checkpoint_dir))
latest_checkpoint = os.path.join(checkpoint_dir, checkpoint_files[-1])  # Picking the latest one

# Building and loading model
model = build_model(vocab_size, embedding_dim, rnn_units, batch_size=1)
model.build(tf.TensorShape([1, None]))
model.load_weights(latest_checkpoint)  # Load weights

print("Loaded checkpoint:", latest_checkpoint)


Loaded checkpoint: ./training_checkpoints/ckpt_9.weights.h5


In [172]:
def generate_text(model, start_string, num_generate=1000):
    input_eval = [char2idx[s] for s in start_string]
    input_eval = tf.expand_dims(input_eval, 0)

    text_generated = []

    for _ in range(num_generate):
        predictions = model(input_eval)  

        predicted_id = tf.random.categorical(predictions[:, -1, :], num_samples=1).numpy()[0, 0]
        input_eval = tf.expand_dims([predicted_id], 0)

        text_generated.append(idx2char[predicted_id])

    return start_string + ''.join(text_generated)


In [173]:
print(generate_text(model, start_string=u"ROMEO: "))

ROMEO: Aft, princely Counteduse;
The churchy may be a fellow-roood; am
taken my will.

First Soloved'd,
Hath shator for your hands. Good Hants,
Nor cannot need make me? there be in
commond, either than things eye
That manhules are fourteen; what fair daughter?
Thus, call the other.

CURTIS:
And silent no lands she's set more prosper maning bold?
We seel our son'stain'd flose thing or now,
For he thought in seeming bolds,
Which thou spoke point to all the bonds and feel pause?

TRANIO:
'Tis a true and not hard malls amongst I die.
Why dost thou art a sister, a movinio should rail issue, who crots
but in my imagine cannot stur and wed; not of Salisbury, who, trunks it strength,
Tybalt spoils, I here,
And will me resolve as death:
Having thee ne'er then changed up, here brings,
And let us see no to his hands.

EIRIA:
True Hermione, in York, that man stand, boy
Are old mady crowns, bleasted down'd have each ter-in-plant;
The kingly day shalt look up
In banished all poor wars in degree,
Tha