In [9]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.models import Sequential
import random

In [10]:
# step 1:
# input text , character set
text = "this is a simple paragraph that is meant to be nice and easy to type which is why there will be mommas"
chars = sorted(list(set(text)))
char_to_index = {char: i for i , char in enumerate(chars)}
index_to_char = {i: char for i , char in enumerate(chars)}
vocab_size = len(chars)
seq_length = 10

In [11]:
# step 2: preparing training seq
# seq, label
sequences = []
labels = []

for i in range(len(text) - seq_length):
    seq = text[i:i+seq_length]
    label = text[i+seq_length]
    sequences.append([char_to_index[char] for char in seq])
    labels.append(char_to_index[label])
x = np.array(sequences)
y = np.array(labels)
# seq, label => one hot encoding
x_one_hot = tf.one_hot(x, vocab_size)
y_one_hot = tf.one_hot(y, vocab_size)

In [12]:
# step 3: build LSTM
model = Sequential([LSTM(128, input_shape = (seq_length, vocab_size),return_sequences=False),
                    Dense(vocab_size, activation='softmax')])
# compile and train the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(x_one_hot, y_one_hot, epochs=300, batch_size=32)

print("training Completed")


Epoch 1/300


  super().__init__(**kwargs)


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 24ms/step - accuracy: 0.0365 - loss: 2.9474  
Epoch 2/300
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step - accuracy: 0.1924 - loss: 2.9190
Epoch 3/300
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step - accuracy: 0.2033 - loss: 2.8908
Epoch 4/300
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step - accuracy: 0.2150 - loss: 2.8580
Epoch 5/300
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step - accuracy: 0.1955 - loss: 2.8214
Epoch 6/300
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step - accuracy: 0.1955 - loss: 2.7697
Epoch 7/300
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step - accuracy: 0.2189 - loss: 2.6885
Epoch 8/300
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step - accuracy: 0.1955 - loss: 2.6746
Epoch 9/300
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1

In [13]:
# step 4: temperature based sampling
def sample(preds, temperature=1.0):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds+ 1e-9) / temperature # avoid log(0)
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    return np.random.choice(range(len(chars)), p=preds)

In [14]:
# Step 5: gene train=> model => new text
start_seq  = "this is a s"
generated_text = start_seq
for _ in range(100):
  input_seq = generated_text[-seq_length:]
  input_indices = [char_to_index.get(c,0) for c in input_seq]
  input_one_hot = tf.one_hot([input_indices], vocab_size)
  preds = model.predict(input_one_hot, verbose=0)[0]
  next_index = sample(preds,temperature=0.8)
  next_char = index_to_char[next_index]
  generated_text += next_char

print("Generated Text:")
print(generated_text)

Generated Text:
this is a simple paragraph that is meant to be nice and easy to type which is why there will be mommasnd  ssaoa
