<a href="https://colab.research.google.com/github/YaraEmad208/NLP-Task-/blob/main/NLP_Task.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense, Dropout
from tensorflow.keras.utils import to_categorical


1. Load the built-in IMDB dataset with the top 10,000 most frequent words.

In [2]:
num_words = 10000
maxlen = 100  # Each review will be cut off or padded to 100 words.
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=num_words)


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step


2. Preprocess the data: Pad sequences to ensure uniform length.

In [3]:
x_train = pad_sequences(x_train, maxlen=maxlen)
x_test = pad_sequences(x_test, maxlen=maxlen)

In [4]:
y_train_cat = to_categorical(y_train, num_classes=2)
y_test_cat = to_categorical(y_test, num_classes=2)

3. Build the RNN model.

In [6]:
model = Sequential([
    # Embedding layer: Converts word indices into dense vectors.
    Embedding(input_dim=num_words, output_dim=32, input_length=maxlen),
    # RNN layer (using SimpleRNN here; you could also try GRU or LSTM)
    SimpleRNN(32, dropout=0.2, recurrent_dropout=0.2),
    Dense(128, activation='relu'),
    Dropout(0.2),
    # Dense output layer with softmax activation for binary classification.
    Dense(2, activation='softmax')
])

4. Compile the model with categorical_crossentropy loss.

In [7]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

5. Train the model.

In [8]:
model.fit(x_train, y_train_cat, epochs=25, batch_size=64, validation_split=0.2)


Epoch 1/25
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 20ms/step - accuracy: 0.4980 - loss: 0.7015 - val_accuracy: 0.4970 - val_loss: 0.6925
Epoch 2/25
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 10ms/step - accuracy: 0.5124 - loss: 0.6948 - val_accuracy: 0.5570 - val_loss: 0.6858
Epoch 3/25
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step - accuracy: 0.5397 - loss: 0.6884 - val_accuracy: 0.5766 - val_loss: 0.6658
Epoch 4/25
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 10ms/step - accuracy: 0.6036 - loss: 0.6556 - val_accuracy: 0.6418 - val_loss: 0.6114
Epoch 5/25
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 11ms/step - accuracy: 0.6855 - loss: 0.5827 - val_accuracy: 0.6758 - val_loss: 0.5890
Epoch 6/25
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step - accuracy: 0.7488 - loss: 0.5007 - val_accuracy: 0.7200 - val_loss: 0.5454
Epoch 7/25
[1m313/31

<keras.src.callbacks.history.History at 0x7da59eedbb10>

6. Evaluate the model on the test dataset.

In [9]:
loss, accuracy = model.evaluate(x_test, y_test_cat)
print("Test Accuracy:", accuracy)

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.7132 - loss: 0.8961
Test Accuracy: 0.7164000272750854
