In [2]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Embedding
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [3]:
# Step 1: Load and Prepare Text Data
# Example text data
text = """
Deep learning allows machines to solve problems that were once thought to require human intelligence. 
It enables neural networks to learn from vast amounts of data to recognize patterns and make predictions. 
This has revolutionized fields like image recognition, natural language processing, and autonomous vehicles.
"""

In [8]:
# Convert text to lowercase and split into words
text = text.lower()

In [10]:
# Step 2: Tokenize the Text
tokenizer = Tokenizer()
tokenizer.fit_on_texts([text])
total_words = len(tokenizer.word_index) + 1  # Include the reserved token for padding

In [12]:
# Convert text into sequences of tokens
input_sequences = []
for line in text.split("\n"):
    token_list = tokenizer.texts_to_sequences([line])[0]
    for i in range(1, len(token_list)):
        n_gram_sequence = token_list[:i + 1]
        input_sequences.append(n_gram_sequence)

In [14]:
# Step 3: Pad Sequences
max_sequence_len = max([len(x) for x in input_sequences])
input_sequences = pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre')

In [16]:
# Separate predictors (X) and target (y)
X = input_sequences[:, :-1]
y = input_sequences[:, -1]

In [18]:
# Convert target to one-hot encoding
y = tf.keras.utils.to_categorical(y, num_classes=total_words)

In [20]:
# Step 4: Build the LSTM Model
model = Sequential([
    Embedding(total_words, 50, input_length=max_sequence_len - 1),  # Embedding layer
    LSTM(100, return_sequences=False),  # LSTM layer
    Dense(total_words, activation='softmax')  # Output layer
])



In [22]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())

None


In [24]:
# Step 5: Train the Model
model.fit(X, y, epochs=100, verbose=1)

Epoch 1/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 23ms/step - accuracy: 0.0000e+00 - loss: 3.7372
Epoch 2/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.1106 - loss: 3.7296 
Epoch 3/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.1210 - loss: 3.7217 
Epoch 4/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.1052 - loss: 3.7127 
Epoch 5/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.0843 - loss: 3.7066 
Epoch 6/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.0947 - loss: 3.6941 
Epoch 7/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.1052 - loss: 3.6772 
Epoch 8/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.0947 - loss: 3.6592
Epoch 9/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[

<keras.src.callbacks.history.History at 0x24b8671aae0>

In [26]:
# Step 6: Generate Text
def generate_text(seed_text, next_words, max_sequence_len):
    for _ in range(next_words):
        token_list = tokenizer.texts_to_sequences([seed_text])[0]
        token_list = pad_sequences([token_list], maxlen=max_sequence_len - 1, padding='pre')
        predicted = model.predict(token_list, verbose=0)
        predicted_word = tokenizer.index_word[np.argmax(predicted)]
        seed_text += " " + predicted_word
    return seed_text

In [32]:
# Generate new text
seed_text = "deep learning allows"
generated_text = generate_text(seed_text, next_words=20, max_sequence_len=max_sequence_len)
print("Generated Text:")
print(generated_text)

Generated Text:
deep learning allows machines to solve problems that were once thought to require human intelligence intelligence predictions predictions predictions predictions predictions predictions predictions
