<a href="https://colab.research.google.com/github/ashwanthpolusani/3rd_year_nlp/blob/main/Assignment6.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import keras
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Embedding, GRU, Dense
from keras.datasets import imdb
from sklearn.model_selection import train_test_split

# (a) Preprocessing of the Data
max_features = 10000  # Number of words to consider as features
maxlen = 100  # Cut texts after this number of words (among the max_features most common words)

# Load and preprocess the IMDb dataset
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)

# Pad sequences to ensure consistent input size
x_train = pad_sequences(x_train, maxlen=maxlen)
x_test = pad_sequences(x_test, maxlen=maxlen)

# (b) Divide data into training and testing dataset
# Training and test data already divided in IMDb dataset

# (c) Build the GRU Model
model = Sequential()
model.add(Embedding(max_features, 128, input_length=maxlen))
model.add(GRU(128, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(1, activation='sigmoid'))

# (d) Training the GRU Model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(x_train, y_train, batch_size=32, epochs=5, validation_data=(x_test, y_test))

# (e) Text Generation Using the Trained Model
# For text generation, we'll need a different kind of setup (e.g., training on text corpus like LSTM/GRU).
# Here we focus on classification.
# Text generation typically involves word-to-word prediction, which isn't in the current context.

# (f) Evaluate Model’s Accuracy
score, acc = model.evaluate(x_test, y_test, batch_size=32)
print(f'Test score: {score}')
print(f'Test accuracy: {acc}')


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step




Epoch 1/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m150s[0m 185ms/step - accuracy: 0.6831 - loss: 5.3389 - val_accuracy: 0.7359 - val_loss: 0.5305
Epoch 2/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m198s[0m 180ms/step - accuracy: 0.8590 - loss: 0.3438 - val_accuracy: 0.7380 - val_loss: 0.5222
Epoch 3/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m162s[0m 206ms/step - accuracy: 0.7667 - loss: 2.2960 - val_accuracy: 0.6875 - val_loss: 0.5766
Epoch 4/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m182s[0m 181ms/step - accuracy: 0.7066 - loss: 27.7581 - val_accuracy: 0.6866 - val_loss: 0.5784
Epoch 5/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 181ms/step - accuracy: 0.7636 - loss: 13.5534 - val_accuracy: 0.6693 - val_loss: 0.6047
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 31ms/step - accuracy: 0.6678 - loss: 0.6110
Test score: 0.6046609878540039
Test accuracy: 0.66927999258041

In [None]:
pip install tensorflow



In [None]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, GRU, Dense
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.datasets import imdb
from sklearn.model_selection import train_test_split

# Hyperparameters
max_features = 10000  # Vocabulary size
maxlen = 100  # Max length of a review (words)
batch_size = 32
epochs = 5

# Load the IMDb dataset
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)

# Pad the sequences
x_train = pad_sequences(x_train, maxlen=maxlen)
x_test = pad_sequences(x_test, maxlen=maxlen)

# Function to build the LSTM model
def build_lstm_model():
    model = Sequential()
    model.add(Embedding(max_features, 128, input_length=maxlen))
    model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

# Function to build the GRU model
def build_gru_model():
    model = Sequential()
    model.add(Embedding(max_features, 128, input_length=maxlen))
    model.add(GRU(128, dropout=0.2, recurrent_dropout=0.2))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

# (a) Train and evaluate the LSTM model
print("Training LSTM model...")
lstm_model = build_lstm_model()
lstm_history = lstm_model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(x_test, y_test))

# (b) Train and evaluate the GRU model
print("Training GRU model...")
gru_model = build_gru_model()
gru_history = gru_model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(x_test, y_test))

# Compare results
lstm_score, lstm_acc = lstm_model.evaluate(x_test, y_test, batch_size=batch_size)
gru_score, gru_acc = gru_model.evaluate(x_test, y_test, batch_size=batch_size)

print(f"\nLSTM Model - Test Accuracy: {lstm_acc}")
print(f"GRU Model - Test Accuracy: {gru_acc}")


Training LSTM model...
Epoch 1/5




[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m182s[0m 230ms/step - accuracy: 0.7296 - loss: 0.5282 - val_accuracy: 0.8341 - val_loss: 0.3831
Epoch 2/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m202s[0m 230ms/step - accuracy: 0.8615 - loss: 0.3319 - val_accuracy: 0.8414 - val_loss: 0.3649
Epoch 3/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m203s[0m 231ms/step - accuracy: 0.9063 - loss: 0.2422 - val_accuracy: 0.8455 - val_loss: 0.3758
Epoch 4/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m203s[0m 232ms/step - accuracy: 0.9239 - loss: 0.1934 - val_accuracy: 0.8421 - val_loss: 0.4101
Epoch 5/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m199s[0m 228ms/step - accuracy: 0.9440 - loss: 0.1486 - val_accuracy: 0.8430 - val_loss: 0.4438
Training GRU model...
Epoch 1/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m145s[0m 180ms/step - accuracy: 0.6641 - loss: 47.9940 - val_accuracy: 0.6811 - val_loss: 0.58