In [24]:
from keras.preprocessing import sequence
import keras 
import tensorflow as tf
import os
import numpy as np

In [25]:
path_to_file = keras.utils.get_file('shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')

In [26]:
path_to_file

'C:\\Users\\Admin\\.keras\\datasets\\shakespeare.txt'

In [27]:
text = open(path_to_file, 'rb').read().decode(encoding='utf-8')
print('Length of text: {} characters'.format(len(text)))

Length of text: 1115394 characters


In [28]:
print(text[:150])

First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You are all resolved rather to die than to famish?

A


In [29]:
#preprocess the text

vocab = sorted(set(text))
char2idx = {u:i for i, u in enumerate(vocab)}
idx2char = np.array(vocab)

def text_to_int(text):
    return np.array([char2idx[c] for c in text])

text_as_int = text_to_int(text)

In [30]:
char2idx 

{'\n': 0,
 ' ': 1,
 '!': 2,
 '$': 3,
 '&': 4,
 "'": 5,
 ',': 6,
 '-': 7,
 '.': 8,
 '3': 9,
 ':': 10,
 ';': 11,
 '?': 12,
 'A': 13,
 'B': 14,
 'C': 15,
 'D': 16,
 'E': 17,
 'F': 18,
 'G': 19,
 'H': 20,
 'I': 21,
 'J': 22,
 'K': 23,
 'L': 24,
 'M': 25,
 'N': 26,
 'O': 27,
 'P': 28,
 'Q': 29,
 'R': 30,
 'S': 31,
 'T': 32,
 'U': 33,
 'V': 34,
 'W': 35,
 'X': 36,
 'Y': 37,
 'Z': 38,
 'a': 39,
 'b': 40,
 'c': 41,
 'd': 42,
 'e': 43,
 'f': 44,
 'g': 45,
 'h': 46,
 'i': 47,
 'j': 48,
 'k': 49,
 'l': 50,
 'm': 51,
 'n': 52,
 'o': 53,
 'p': 54,
 'q': 55,
 'r': 56,
 's': 57,
 't': 58,
 'u': 59,
 'v': 60,
 'w': 61,
 'x': 62,
 'y': 63,
 'z': 64}

In [31]:
text_as_int

array([18, 47, 56, ..., 45,  8,  0])

In [32]:
print(text[:13])
print(text_as_int[:13])

First Citizen
[18 47 56 57 58  1 15 47 58 47 64 43 52]


In [33]:
def int_to_text(ints):
    try:
        ints = ints.numpy()
    except:
        pass
    return ''.join(idx2char[ints])

In [34]:
#training examples

seq_length = 100
examples_per_epoch = len(text)//(seq_length+1)

char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)

In [35]:
sequences = char_dataset.batch(seq_length+1, drop_remainder=True)

In [36]:
def split_input_target(chunk):
    input_text = chunk[:-1]
    target_text = chunk[1:]
    return input_text, target_text

dataset = sequences.map(split_input_target)

In [37]:
BATCH_SIZE = 64
VOCAB_SIZE = len(vocab)
EMBEDDING_DIM = 256
RNN_UNITS = 1024

BUFFER_SIZE = 10000

data = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)

In [38]:
def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
    model = tf.keras.Sequential([
        tf.keras.layers.Embedding(vocab_size,embedding_dim,batch_input_shape=[batch_size, None]),
        tf.keras.layers.LSTM(rnn_units, return_sequences=True, stateful=True, recurrent_initializer='glorot_uniform'),
        tf.keras.layers.Dense(vocab_size)
    ])
    return model

model = build_model(VOCAB_SIZE, EMBEDDING_DIM, RNN_UNITS, BATCH_SIZE)
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_1 (Embedding)     (64, None, 256)           16640     
                                                                 
 lstm_1 (LSTM)               (64, None, 1024)          5246976   
                                                                 
 dense_1 (Dense)             (64, None, 65)            66625     
                                                                 
Total params: 5,330,241
Trainable params: 5,330,241
Non-trainable params: 0
_________________________________________________________________


In [39]:
for input_example_batch, target_example_batch in data.take(1):
    example_batch_predictions = model(input_example_batch)
    print(example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")

(64, 100, 65) # (batch_size, sequence_length, vocab_size)


In [46]:
print(len(example_batch_predictions))
print(example_batch_predictions)

64
tf.Tensor(
[[[ 1.1706926e-03  5.9754925e-04  1.3404323e-03 ...  3.7092376e-03
   -4.1774549e-03 -1.3708094e-03]
  [ 1.2203853e-03  6.8749231e-04  1.7706987e-03 ...  6.9231335e-03
   -7.2856308e-03 -1.9497500e-03]
  [-3.3207773e-03 -4.0201158e-03  5.5524078e-03 ...  1.2288874e-02
   -7.7592903e-03  1.7145313e-03]
  ...
  [-6.7947004e-03  7.8829071e-03 -4.8345039e-03 ...  1.8479566e-03
   -2.2826833e-03 -6.8860436e-03]
  [-5.3630392e-03  6.8287123e-03 -3.2999911e-03 ...  6.1771362e-03
   -6.0767895e-03 -5.5540032e-03]
  [-4.9173227e-03  5.5857329e-03 -2.4854916e-03 ...  9.8390821e-03
   -8.6639421e-03 -4.1386993e-03]]

 [[-1.1176897e-03  5.8236020e-03 -1.6999191e-03 ...  3.3471285e-04
    1.7674230e-03  2.3527243e-03]
  [ 1.1965964e-04  1.0118575e-02 -1.8520231e-03 ...  1.1857441e-03
    3.2447975e-03  1.9804449e-03]
  [ 1.9060748e-03  3.9203735e-03  4.2733550e-04 ...  4.1353598e-04
   -1.1757341e-03  7.1871653e-03]
  ...
  [ 4.5197816e-03  9.3256738e-03 -4.9903961e-03 ...  1.3071320e

In [47]:
pred = example_batch_predictions[0]
print(len(pred))
print(pred)

100
tf.Tensor(
[[ 0.00117069  0.00059755  0.00134043 ...  0.00370924 -0.00417745
  -0.00137081]
 [ 0.00122039  0.00068749  0.0017707  ...  0.00692313 -0.00728563
  -0.00194975]
 [-0.00332078 -0.00402012  0.00555241 ...  0.01228887 -0.00775929
   0.00171453]
 ...
 [-0.0067947   0.00788291 -0.0048345  ...  0.00184796 -0.00228268
  -0.00688604]
 [-0.00536304  0.00682871 -0.00329999 ...  0.00617714 -0.00607679
  -0.005554  ]
 [-0.00491732  0.00558573 -0.00248549 ...  0.00983908 -0.00866394
  -0.0041387 ]], shape=(100, 65), dtype=float32)


In [48]:
time_pred = pred[0]
print(len(time_pred))
print(time_pred)

65
tf.Tensor(
[ 1.1706926e-03  5.9754925e-04  1.3404323e-03 -3.8472763e-03
 -2.1261976e-03  1.9158999e-04  4.5450067e-04 -1.4889080e-03
  2.7511453e-03  4.7153952e-03  6.0821562e-03  3.8726334e-03
 -2.0176386e-03 -6.8115856e-04  1.0302619e-03 -2.6095053e-04
  5.5080338e-04  1.3788808e-03  6.2241000e-03 -1.7562407e-03
  2.6553281e-04  3.2422662e-04  6.6543235e-03 -2.8236161e-03
  4.0387744e-03 -1.6343321e-03  1.3794593e-03 -9.1939885e-03
 -9.9842460e-04 -4.8296489e-03 -5.1499583e-04  5.0242627e-03
 -1.0674355e-02 -9.4260182e-04 -2.2620952e-07  9.1047148e-04
  3.8723070e-03  8.7725883e-04  2.2982650e-03 -1.3800992e-03
 -1.0938349e-03  4.3051150e-03  2.5837666e-03 -3.3336948e-03
  5.2596806e-03 -5.2909637e-03  6.2028947e-04 -1.0877289e-03
 -1.6594389e-03  7.4518169e-03  4.3768166e-03  6.8549695e-04
  3.7286682e-03 -4.2764778e-04  5.2372785e-03  2.0592588e-03
  2.4246825e-03  3.0692695e-03 -1.6415652e-03 -8.2075493e-03
  7.2231237e-04  1.9534610e-03  3.7092376e-03 -4.1774549e-03
 -1.370809

In [49]:
sampled_indices = tf.random.categorical(pred, num_samples=1)
sampled_indices = np.reshape(sampled_indices, (1, -1))[0]
predicted_chars = int_to_text(sampled_indices)

predicted_chars

"MpblturWGpn&tMHZQNB.nT,-dlNCNKia\nNvY,kJybG&:UwHeKSWuoyqfXp'Z ,.htl:yq$AQ;VsSrMBaY\n,vvGDj sOxUVkwl;m-"

In [50]:
def loss(labels,logits):
    return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)

In [51]:
model.compile(optimizer='adam', loss=loss)

In [52]:
checkpoint_dir = './training_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

checkpoint_callback=tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)
    

In [None]:
history = model.fit(data, epochs=10, callbacks=[checkpoint_callback])

In [61]:
model = build_model(VOCAB_SIZE, EMBEDDING_DIM, RNN_UNITS, batch_size=1)

In [74]:
model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))
model.build(tf.TensorShape([1, None]))

In [77]:
def generate_text(model, start_string):
    num_generate = 500
    input_eval = [char2idx[s] for s in start_string]
    input_eval = tf.expand_dims(input_eval, 0)

    text_generated = []
    temprature = 1.0

    model.reset_states()
    for i in range(num_generate):
        predictions = model(input_eval)
        predictions = tf.squeeze(predictions, 0)

        predictions = predictions/temprature
        predicted_id = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy()

        input_eval = tf.expand_dims([predicted_id], 0)
        text_generated.append(idx2char[predicted_id])
    
    return (start_string + ''.join(text_generated))

In [81]:
inp = input("Enter the starting string: ")
print(generate_text(model, inp))

romeon botn. You will, my rost
It is it bothers: see, you matchat together
Not by my guest and hoir to make, and would better strace
consul, that we make Romeo's Capitol;
Unless the reye know his sport,
And hasterful uncleofful veigiance,
Not our both to are conceive.
Dear not a teal'd of rejour have been,-a king
That you shall know it will cure his world. 'Tis true:
The kind! how were, a Iset a loving grave?

ROMEO:
Why stand I was too brgizenge?

FORDELEN:
She shall, master out of a crown,
They do 
