## Word Prediction Model## Training an RNN on a small text dataset.
## Modifying the architecture to include LSTMs.
## and then Evaluating the model's predictions on unseen text data.


In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, LSTM, Dense, Embedding

# Small text dataset
text = ["hello world", "hello tensorflow", "tensorflow is powerful", "world of AI"]

# Tokenization
tokenizer = Tokenizer()
tokenizer.fit_on_texts(text)
word_index = tokenizer.word_index
sequences = tokenizer.texts_to_sequences(text)

# Prepare input-output pairs
X, y = [], []
for seq in sequences:
    for i in range(1, len(seq)):  # Create input-output pairs
        X.append(seq[:i])  # Input sequence
        y.append(seq[i])    # Target word

# Pad sequences to the same length
X = pad_sequences(X, padding="pre")
y = np.array(y)

# Build the model with LSTM
model = Sequential([
    Embedding(len(word_index) + 1, 10),  # Word embedding
    LSTM(10, activation="relu"),  # LSTM layer
    Dense(len(word_index) + 1, activation="softmax")  # Output layer
])

# Compile and train the model
model.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
model.fit(X, y, epochs=50, verbose=0)

# Function to predict the next word
def predict_next_word(model, tokenizer, text_seq):
    seq = tokenizer.texts_to_sequences([text_seq])  # Convert text to numbers
    seq = pad_sequences(seq, maxlen=X.shape[1], padding="pre")  # Pad input
    pred_index = np.argmax(model.predict(seq))  # Get the highest probability
    for word, index in tokenizer.word_index.items():
        if index == pred_index:
            return word  # Return predicted word

# Example: Predict the next word for "hello"
next_word = predict_next_word(model, tokenizer, "hello")
print(f"Predicted next word: {next_word}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 236ms/step
Predicted next word: tensorflow
