In [2]:
# Step 1: Import necessary libraries
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, GRU, LSTM, Dense
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer
import numpy as np

# Step 2: Load dataset (for example, we'll use the IMDB dataset)
from tensorflow.keras.datasets import imdb
max_features = 10000  # Vocabulary size
max_len = 200  # Sequence length

(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)

# Step 3: Preprocess the Data
x_train = pad_sequences(x_train, maxlen=max_len)
x_test = pad_sequences(x_test, maxlen=max_len)

# Step 4: Build the GRU Model
def create_gru_model():
    model = Sequential([
        Embedding(input_dim=max_features, output_dim=128, input_length=max_len),
        GRU(64, return_sequences=True),
        GRU(32),
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

gru_model = create_gru_model()

# Step 5: Train the GRU Model
gru_history = gru_model.fit(x_train, y_train, epochs=3, batch_size=64, validation_split=0.2)

# Step 6: Evaluate the GRU Model
gru_eval = gru_model.evaluate(x_test, y_test)
print(f"GRU Model Accuracy: {gru_eval[1]:.4f}")

# Step 7: Build the LSTM Model
def create_lstm_model():
    model = Sequential([
        Embedding(input_dim=max_features, output_dim=128, input_length=max_len),
        LSTM(64, return_sequences=True),
        LSTM(32),
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

lstm_model = create_lstm_model()

# Step 8: Train the LSTM Model
lstm_history = lstm_model.fit(x_train, y_train, epochs=3, batch_size=64, validation_split=0.2)

# Step 9: Evaluate the LSTM Model
lstm_eval = lstm_model.evaluate(x_test, y_test)
print(f"LSTM Model Accuracy: {lstm_eval[1]:.4f}")

# Step 10: Text Generation Using the Trained GRU Model
seed_text = "The movie was fantastic"
tokenizer = Tokenizer(num_words=max_features)
tokenizer.fit_on_texts(imdb.get_word_index().keys())
seed_seq = tokenizer.texts_to_sequences([seed_text])[0]
seed_seq = pad_sequences([seed_seq], maxlen=max_len)

generated_text = seed_text

for _ in range(50):
    # Predict the next word
    next_word_pred = np.argmax(gru_model.predict(seed_seq), axis=-1)

    # Check if the predicted index is in the index_word dictionary
    if next_word_pred[0] in tokenizer.index_word:
        generated_text += " " + tokenizer.index_word[next_word_pred[0]]
    else:
        generated_text += " <unk>"  # Append <unk> for unknown words

    # Update the seed sequence
    seed_seq = np.append(seed_seq[:, 1:], [[next_word_pred[0]]], axis=1)

print(f"Generated Text with GRU: {generated_text}")

# Step 11: Compare Accuracy
print(f"GRU Accuracy: {gru_eval[1]:.4f}")
print(f"LSTM Accuracy: {lstm_eval[1]:.4f}")


Epoch 1/3
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m146s[0m 448ms/step - accuracy: 0.6784 - loss: 0.5628 - val_accuracy: 0.8512 - val_loss: 0.3497
Epoch 2/3
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m143s[0m 452ms/step - accuracy: 0.8970 - loss: 0.2592 - val_accuracy: 0.8534 - val_loss: 0.3344
Epoch 3/3
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m143s[0m 456ms/step - accuracy: 0.9451 - loss: 0.1510 - val_accuracy: 0.8752 - val_loss: 0.3586
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 58ms/step - accuracy: 0.8628 - loss: 0.3798
GRU Model Accuracy: 0.8617
Epoch 1/3
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m128s[0m 396ms/step - accuracy: 0.6840 - loss: 0.5702 - val_accuracy: 0.8400 - val_loss: 0.3780
Epoch 2/3
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m141s[0m 394ms/step - accuracy: 0.8917 - loss: 0.2849 - val_accuracy: 0.8528 - val_loss: 0.3462
Epoch 3/3
[1m313/313[0m [32m━━━━━━