In [None]:

import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.preprocessing.sequence import pad_sequences
import re

# Load text (update path as needed)
with open("/content/drive/MyDrive/DL_dataset/LSTM_network.txt", "r", encoding="utf-8") as file:
    text = file.read().lower()  # lowercase for uniformity

print(text)


In [None]:

# Tokenize text
tokenizer = Tokenizer()
tokenizer.fit_on_texts([text])
total_words = len(tokenizer.word_index) + 1
print("Total words:", total_words)
print(tokenizer.word_index)


In [None]:

input_sequences = []
sentences = re.split(r'\.\s*', text.strip())

for line in sentences:
    line = line.strip()
    if not line:
        continue
    tokens = tokenizer.texts_to_sequences([line])[0]
    for i in range(1, len(tokens)):
        n_gram_sequence = tokens[:i+1]
        input_sequences.append(n_gram_sequence)

print(input_sequences[:5])


In [None]:

max_seq_len = max([len(x) for x in input_sequences])
input_sequences = pad_sequences(input_sequences, maxlen=max_seq_len, padding='pre')

X = input_sequences[:, :-1]
y = input_sequences[:, -1]
y = to_categorical(y, num_classes=total_words)


In [None]:

model = Sequential([
    Embedding(input_dim=total_words, output_dim=128, input_length=max_seq_len - 1),
    LSTM(128, return_sequences=True),
    Dropout(0.3),
    LSTM(64, return_sequences=True),
    Dropout(0.3),
    LSTM(32),
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dropout(0.2),
    Dense(total_words, activation='softmax')
])

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()


In [None]:

model.fit(X, y, epochs=500, verbose=0)


In [None]:

def predict_next_word(model, tokenizer, text_seed, max_sequence_len):
    token_list = tokenizer.texts_to_sequences([text_seed])[0]
    token_list = pad_sequences([token_list], maxlen=max_sequence_len - 1, padding='pre')
    predicted = model.predict(token_list, verbose=0)
    predicted_index = np.argmax(predicted, axis=1)[0]
    for word, index in tokenizer.word_index.items():
        if index == predicted_index:
            return word
    return None

seed_text = "deep learning"
next_word = predict_next_word(model, tokenizer, seed_text, max_seq_len)
print(f"Input: {seed_text}")
print(f"Predicted next word: {next_word}")
