In [1]:
corpus = [
    "Democracy gives power to the people",
    "Modern man seeks freedom and equality",
    "A true democracy protects every citizen",
    "Modern society depends on technology",
    "Democracy requires participation and awareness",
    "Modern man values comfort and convenience",
    "A democracy fails when people remain silent",
    "Education strengthens democracy",
    "Modern man is connected through the internet",
    "Freedom of speech is the foundation of democracy",
    "Modern life moves faster than ever before",
    "A democratic nation respects diversity and justice",
    "Technology shapes the modern human experience",
    "True democracy promotes peace and progress",
    "Modern challenges require global cooperation",
]



In [11]:
from tensorflow.keras.preprocessing.text import Tokenizer

tokenizer = Tokenizer()
tokenizer.fit_on_texts(corpus)

total_words = len(tokenizer.word_index) + 1
print("Total unique words:", total_words)


Total unique words: 62


In [3]:
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np

input_sequences = []

for line in corpus:
    token_list = tokenizer.texts_to_sequences([line])[0]
    for i in range(1, len(token_list)):
        n_gram_sequence = token_list[:i+1]
        input_sequences.append(n_gram_sequence)

max_sequence_len = max([len(x) for x in input_sequences])
input_sequences = np.array(pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre'))


In [4]:
X = input_sequences[:, :-1]
y = input_sequences[:, -1]

from tensorflow.keras.utils import to_categorical
y = to_categorical(y, num_classes=total_words)


In [10]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense

model = Sequential()
model.add(Embedding(total_words, 100, input_length=max_sequence_len-1))
model.add(LSTM(100))
model.add(Dense(total_words, activation='softmax'))


In [6]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()


In [7]:
history = model.fit(X, y, epochs=200, verbose=1)


Epoch 1/200
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - accuracy: 0.0000e+00 - loss: 4.1294 
Epoch 2/200
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.0800 - loss: 4.1171
Epoch 3/200
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.0933 - loss: 4.1073
Epoch 4/200
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.0933 - loss: 4.0957 
Epoch 5/200
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.0933 - loss: 4.0839 
Epoch 6/200
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.0933 - loss: 4.0670
Epoch 7/200
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.0933 - loss: 4.0459 
Epoch 8/200
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.0933 - loss: 4.0142
Epoch 9/200
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[

In [8]:
def predict_next_words(seed_text, num_words):
    for _ in range(num_words):
        token_list = tokenizer.texts_to_sequences([seed_text])[0]
        token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
        predicted = np.argmax(model.predict(token_list, verbose=0))
        
        output_word = ""
        for word, index in tokenizer.word_index.items():
            if index == predicted:
                output_word = word
                break
        seed_text += " " + output_word
    return seed_text


In [9]:
print(predict_next_words("democracy gives", 3))
print(predict_next_words("modern man", 3))
print(predict_next_words("freedom of", 3))


democracy gives power to the
modern man values comfort and
freedom of speech is the
