<a href="https://colab.research.google.com/github/arunpentkar/NLP-Assignments/blob/main/Assingnment_06.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Parameters for data processing
max_features = 20000  # Number of words to consider as features
maxlen = 100  # Cut off reviews after 100 words

# Load data from keras.datasets
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)

# Pad sequences to ensure uniform length
x_train = pad_sequences(x_train, maxlen=maxlen)
x_test = pad_sequences(x_test, maxlen=maxlen)


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step


In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, GRU, Dense

# Parameters for the GRU model
embedding_dim = 128
gru_units = 64

# Build the GRU model
gru_model = Sequential()
gru_model.add(Embedding(input_dim=max_features, output_dim=embedding_dim, input_length=maxlen))
gru_model.add(GRU(gru_units, dropout=0.2, recurrent_dropout=0.2))
gru_model.add(Dense(1, activation='sigmoid'))

# Compile the model
gru_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])




In [None]:
# Train the GRU model
gru_history = gru_model.fit(x_train, y_train, batch_size=32, epochs=5, validation_split=0.2)


Epoch 1/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 95ms/step - accuracy: 0.6544 - loss: 24.7721 - val_accuracy: 0.6892 - val_loss: 0.5781
Epoch 2/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 91ms/step - accuracy: 0.7777 - loss: 594.5159 - val_accuracy: 0.6904 - val_loss: 0.5825
Epoch 3/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 93ms/step - accuracy: 0.8000 - loss: 0.4383 - val_accuracy: 0.6880 - val_loss: 0.5917
Epoch 4/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 93ms/step - accuracy: 0.8353 - loss: 0.3843 - val_accuracy: 0.6886 - val_loss: 0.6084
Epoch 5/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m85s[0m 98ms/step - accuracy: 0.8522 - loss: 0.3452 - val_accuracy: 0.6996 - val_loss: 0.6249


In [None]:
import numpy as np

def generate_text(seed_text, next_words=20):
    # Get the word index dictionary from IMDB dataset
    word_index = imdb.get_word_index()
    reverse_word_index = {value: key for (key, value) in word_index.items()}

    # Convert seed text to indices
    tokenized_seq = [word_index.get(word, 0) for word in seed_text.lower().split()]
    tokenized_seq = pad_sequences([tokenized_seq], maxlen=maxlen, padding='pre')

    generated_text = seed_text

    for _ in range(next_words):
        # Predict next word probabilities
        predicted_probs = gru_model.predict(tokenized_seq, verbose=0)
        predicted_index = int(np.round(predicted_probs[0][0]))

        # Get the predicted word
        predicted_word = reverse_word_index.get(predicted_index, '?')
        generated_text += ' ' + predicted_word

        # Update the sequence for the next prediction
        tokenized_seq = np.roll(tokenized_seq, -1)
        tokenized_seq[0, -1] = predicted_index

    return generated_text

# Example usage
seed_text = "the movie was"
generated_text = generate_text(seed_text, next_words=20)
print("Generated text:", generated_text)


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json
[1m1641221/1641221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step
Generated text: the movie was the the ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?


In [None]:
# Evaluate GRU model
gru_loss, gru_accuracy = gru_model.evaluate(x_test, y_test)
print(f"GRU Model Accuracy: {gru_accuracy:.4f}")


[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 19ms/step - accuracy: 0.6894 - loss: 0.6399
GRU Model Accuracy: 0.6894


In [None]:
from tensorflow.keras.layers import LSTM

# Build the LSTM model
lstm_model = Sequential()
lstm_model.add(Embedding(input_dim=max_features, output_dim=embedding_dim, input_length=maxlen))
lstm_model.add(LSTM(gru_units, dropout=0.2, recurrent_dropout=0.2))
lstm_model.add(Dense(1, activation='sigmoid'))

# Compile the LSTM model
lstm_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the LSTM model
lstm_history = lstm_model.fit(x_train, y_train, batch_size=32, epochs=5, validation_split=0.2)

# Evaluate LSTM model
lstm_loss, lstm_accuracy = lstm_model.evaluate(x_test, y_test)
print(f"LSTM Model Accuracy: {lstm_accuracy:.4f}")


Epoch 1/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 95ms/step - accuracy: 0.7058 - loss: 0.5517 - val_accuracy: 0.8064 - val_loss: 0.4269
Epoch 2/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 94ms/step - accuracy: 0.8801 - loss: 0.3011 - val_accuracy: 0.8380 - val_loss: 0.3797
Epoch 3/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m60s[0m 96ms/step - accuracy: 0.9193 - loss: 0.2158 - val_accuracy: 0.8370 - val_loss: 0.4243
Epoch 4/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 97ms/step - accuracy: 0.9367 - loss: 0.1691 - val_accuracy: 0.8246 - val_loss: 0.4258
Epoch 5/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 93ms/step - accuracy: 0.9561 - loss: 0.1237 - val_accuracy: 0.8256 - val_loss: 0.5126
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 20ms/step - accuracy: 0.8219 - loss: 0.5154
LSTM Model Accuracy: 0.8243


In [None]:
print(f"GRU Model Accuracy: {gru_accuracy:.4f}")
print(f"LSTM Model Accuracy: {lstm_accuracy:.4f}")


GRU Model Accuracy: 0.6894
LSTM Model Accuracy: 0.8243
