Poemgenerator

In [None]:
import numpy as np
from keras.models import Sequential
from keras.layers import LSTM, Dense, Embedding
from keras.optimizers import RMSprop
from keras.utils import to_categorical
from keras.preprocessing.text import Tokenizer
from tensorflow.keras.callbacks import EarlyStopping

Preprocessing the data to be trained

In [None]:
# Load the data
with open('/content/drive/MyDrive/Colab Notebooks/modified_poems.txt', 'r') as file:
    text = file.read().lower()

# Tokenize the data
tokenizer = Tokenizer()
tokenizer.fit_on_texts([text])

# Convert text to sequences of integer values
sequences = tokenizer.texts_to_sequences([text])[0]

# Prepare the dataset of input to output pairs encoded as integers
seq_length = 40
dataX = []
dataY = []
for i in range(0, len(sequences) - seq_length, 1):
    seq_in = sequences[i:i + seq_length]
    seq_out = sequences[i + seq_length]
    dataX.append([int(char) for char in seq_in])
    dataY.append(int(seq_out))

# Total number of unique words
num_words = len(tokenizer.word_index) + 1

# Prepare the dataset of input to output pairs encoded as integers
x = np.array(dataX)
y = to_categorical(dataY, num_classes=num_words)


Building the model 

In [None]:
# Build the model
model = Sequential()
model.add(Embedding(input_dim=num_words, output_dim=256, input_length=seq_length))
model.add(LSTM(400, return_sequences=True))
model.add(LSTM(400))
model.add(Dense(num_words, activation='softmax'))


# Compile the model
optimizer = RMSprop(learning_rate=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)



# Prepare the callback
early_stopping = EarlyStopping(monitor='loss', patience=4)  # Stop if loss doesn't improve for 4 consecutive epochs

# Add it to the `fit` method
model.fit(x, y, batch_size=50, epochs=75, callbacks=[early_stopping], validation_split=0.1)


model.save('modelword3.h5')


Using the model     

Please use the model that I have selected.(Model name = "modelword3.h5")

In [None]:
from keras.models import load_model
from tensorflow.keras.preprocessing.sequence import pad_sequences


def generate_seq(model, tokenizer, seed_text, seq_length, n_words):
    result = list()
    in_text = seed_text
    # generate a fixed number of words
    for _ in range(n_words):
        # encode the text as integer
        encoded = tokenizer.texts_to_sequences([in_text])[0]
        # truncate sequences to a fixed length
        encoded = pad_sequences([encoded], maxlen=seq_length, truncating='pre')
        # predict probabilities for each word
        yhat = model.predict(encoded, verbose=0).argmax(axis=-1)
        # map predicted word index to word
        out_word = ''
        for word, index in tokenizer.word_index.items():
            if index == yhat:
                out_word = word
                break
        # append to input
        in_text += ' ' + out_word
        result.append(out_word)
    return ' '.join(result)

# load the model
model = load_model('path/to/themodel')

# specify the seed text and the length of the generated sequence
seed_text = 'India '
seq_length = 40  # should be the same as seq_length during training
n_words = 22  # number of words to generate

# generate new text
generated = generate_seq(model, tokenizer, seed_text, seq_length, n_words)
print(generated)
