In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.models import Sequential
import random

In [2]:
# step 1:
# input text , character set
text = "this is a simple paragraph that is meant to be nice and easy to type which is why there will be mommas"
chars = sorted(list(set(text)))
char_to_index = {char: i for i , char in enumerate(chars)}
index_to_char = {i: char for i , char in enumerate(chars)}
vocab_size = len(chars)
seq_length = 10

In [3]:
# step 2: preparing training seq
# seq, label
sequences = []
labels = []

for i in range(len(text) - seq_length):
    seq = text[i:i+seq_length]
    label = text[i+seq_length]
    sequences.append([char_to_index[char] for char in seq])
    labels.append(char_to_index[label])
x = np.array(sequences)
y = np.array(labels)
# seq, label => one hot encoding
x_one_hot = tf.one_hot(x, vocab_size)
y_one_hot = tf.one_hot(y, vocab_size)

In [4]:
# step 3: build Multi layer LSTM
model = Sequential([
    LSTM(128, return_sequences=True, input_shape = (seq_length, vocab_size)),
    Dropout(0.2),
    LSTM(64),
    Dropout(0.2),Dense(vocab_size, activation='softmax')])
# compile and train the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(x_one_hot, y_one_hot, epochs=300, batch_size=32)

print("training Completed")


  super().__init__(**kwargs)


Epoch 1/300
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 35ms/step - accuracy: 0.0825 - loss: 2.9444
Epoch 2/300
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step - accuracy: 0.1744 - loss: 2.9205 
Epoch 3/300
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step - accuracy: 0.1955 - loss: 2.8970
Epoch 4/300
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step - accuracy: 0.1916 - loss: 2.8690
Epoch 5/300
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - accuracy: 0.1916 - loss: 2.8177
Epoch 6/300
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step - accuracy: 0.1994 - loss: 2.7499
Epoch 7/300
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step - accuracy: 0.1877 - loss: 2.7676
Epoch 8/300
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - accuracy: 0.1838 - loss: 2.7426
Epoch 9/300
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[

In [5]:
# step 4: temperature based sampling
def sample(preds, temperature=1.0):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds+ 1e-9) / temperature # avoid log(0)
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    return np.random.choice(range(len(chars)), p=preds)

In [6]:
# Step 5: gene train=> model => new text
start_seq  = "this is a s"
generated_text = start_seq
for _ in range(100):
  input_seq = generated_text[-seq_length:]
  input_indices = [char_to_index.get(c,0) for c in input_seq]
  input_one_hot = tf.one_hot([input_indices], vocab_size)
  preds = model.predict(input_one_hot, verbose=0)[0]
  next_index = sample(preds,temperature=0.8)
  next_char = index_to_char[next_index]
  generated_text += next_char

print("Generated Text:")
print(generated_text)

Generated Text:
this is a simple paragraph that is meant to be nicc and e tytto tehwilii lbmommmimsgrggggrrr aattaawilttt oaoom
