In [13]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import LSTM, Dense, Embedding
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import pickle

with open('test.txt', 'r', encoding='utf-8') as file:
    corpus = file.read()

tokenizer = Tokenizer()
tokenizer.fit_on_texts([corpus])
total_words = len(tokenizer.word_index) + 1

input_sequences = []
for line in corpus.split('\n'):
    token_list = tokenizer.texts_to_sequences([line])[0]
    for i in range(1, len(token_list)):
        n_gram_sequence = token_list[:i+1]
        input_sequences.append(n_gram_sequence)

max_sequence_length = max([len(seq) for seq in input_sequences])
input_sequences = pad_sequences(input_sequences, maxlen=max_sequence_length, padding='pre')

X = input_sequences[:, :-1]
y = input_sequences[:, -1]

embedding_dim = 64
lstm_units = 64

model = Sequential([
    Embedding(total_words, embedding_dim, input_length=max_sequence_length-1),
    LSTM(lstm_units),
    Dense(total_words, activation='softmax')
])

optimizer = Adam(learning_rate=0.01)
model.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

history = model.fit(X, y, epochs=30, verbose=1, batch_size=64)

model.save('text_generation_lstm_model_v2.h5')
with open('tokenizer_v2.pickle', 'wb') as tokenizer_file:
    pickle.dump(tokenizer, tokenizer_file)


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [19]:
model = load_model('text_generation_lstm_model_v2.h5')
with open('tokenizer_v2.pickle', 'rb') as tokenizer_file:
    tokenizer = pickle.load(tokenizer_file)

def generate_text(seed_text, next_words, tokenizer, max_sequence_length, model):
    for _ in range(next_words):
        token_list = tokenizer.texts_to_sequences([seed_text])[0]
        token_list = pad_sequences([token_list], maxlen=max_sequence_length - 1, padding='pre')
        predicted = model.predict(token_list, verbose=0)
        predicted_word_index = np.argmax(predicted)
        predicted_word = tokenizer.index_word[predicted_word_index]

        seed_text += " " + predicted_word

    return seed_text

seed_text = "as the sun gently nudged awoke it's captives"
next_words = 10

# Generate text
generated_text = generate_text(seed_text, next_words, tokenizer, max_sequence_length, model)
print(generated_text)


as the sun gently nudged awoke it's captives in the hall quickly rushed over to pay their respects
