<a href="https://colab.research.google.com/github/habiba186/ML_project/blob/main/NLP_TASK.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Sentiment Analysis using Recurrent Neural Networks (RNN)**

**1. Load and Preprocess Data**

In [None]:
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Load dataset with the top 10,000 most frequent words
num_words = 10000
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=num_words)

# Pad sequences to ensure all reviews have the same length
max_len = 200
x_train = pad_sequences(x_train, maxlen=max_len, padding="post")
x_test = pad_sequences(x_test, maxlen=max_len, padding="post")

print(f"Training data shape: {x_train.shape}")
print(f"Testing data shape: {x_test.shape}")


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Training data shape: (25000, 200)
Testing data shape: (25000, 200)


**Create a Simple RNN Model with LSTM**

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense

# Define the model
model = Sequential([
    Embedding(input_dim=num_words, output_dim=128, input_length=max_len),  # Word embedding layer
    SimpleRNN(64, return_sequences=False),  # RNN layer
    Dense(1, activation='sigmoid')  # Output layer for binary classification
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Display the model summary
model.summary()




**Training the Model**

In [None]:
history = model.fit(x_train, y_train, epochs=5, batch_size=64, validation_data=(x_test, y_test))


Epoch 1/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 116ms/step - accuracy: 0.5091 - loss: 0.6918 - val_accuracy: 0.5451 - val_loss: 0.6782
Epoch 2/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 108ms/step - accuracy: 0.6038 - loss: 0.6457 - val_accuracy: 0.5407 - val_loss: 0.6954
Epoch 3/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 115ms/step - accuracy: 0.6422 - loss: 0.5868 - val_accuracy: 0.5812 - val_loss: 0.6627
Epoch 4/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 116ms/step - accuracy: 0.6734 - loss: 0.5247 - val_accuracy: 0.5555 - val_loss: 0.7288
Epoch 5/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 112ms/step - accuracy: 0.6948 - loss: 0.5018 - val_accuracy: 0.5515 - val_loss: 0.7076


**Evaluating the Model**

In [None]:
loss, accuracy = model.evaluate(x_test, y_test)
print(f"Test Accuracy: {accuracy:.2f}")


[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 16ms/step - accuracy: 0.5500 - loss: 0.7101
Test Accuracy: 0.55


# **Sentiment Analysis using LSTM**

 **Load and Preprocess Data**

In [None]:
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Load IMDB dataset (keeping only the top 10,000 most frequent words)
num_words = 10000
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=num_words)

# Pad sequences to ensure all reviews have the same length
max_len = 200
x_train = pad_sequences(x_train, maxlen=max_len, padding="post")
x_test = pad_sequences(x_test, maxlen=max_len, padding="post")

print(f"Training data shape: {x_train.shape}")
print(f"Testing data shape: {x_test.shape}")


Training data shape: (25000, 200)
Testing data shape: (25000, 200)


**Build an LSTM Model**

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout

# Define the model
model = Sequential([
    Embedding(input_dim=num_words, output_dim=128, input_length=max_len),  # Embedding layer
    LSTM(128, return_sequences=False, dropout=0.2, recurrent_dropout=0.2),  # LSTM layer
    Dense(1, activation='sigmoid')  # Output layer for binary classification
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Display the model summary
model.summary()




**Train the Model**

In [None]:
history = model.fit(x_train, y_train, epochs=5, batch_size=64, validation_data=(x_test, y_test))


Epoch 1/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m344s[0m 869ms/step - accuracy: 0.5378 - loss: 0.6827 - val_accuracy: 0.7858 - val_loss: 0.5514
Epoch 2/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m378s[0m 860ms/step - accuracy: 0.6852 - loss: 0.5772 - val_accuracy: 0.7979 - val_loss: 0.5241
Epoch 3/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m348s[0m 774ms/step - accuracy: 0.6791 - loss: 0.5814 - val_accuracy: 0.6034 - val_loss: 0.6195
Epoch 4/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m320s[0m 769ms/step - accuracy: 0.7150 - loss: 0.5128 - val_accuracy: 0.8588 - val_loss: 0.3379
Epoch 5/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m339s[0m 867ms/step - accuracy: 0.9093 - loss: 0.2490 - val_accuracy: 0.8661 - val_loss: 0.3366


**Evaluate the Model**

In [None]:
loss, accuracy = model.evaluate(x_test, y_test)
print(f"Test Accuracy: {accuracy:.2f}")


[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m67s[0m 86ms/step - accuracy: 0.8666 - loss: 0.3387
Test Accuracy: 0.87


In [None]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Create a tokenizer and fit it on the IMDB dataset words
tokenizer = Tokenizer(num_words=num_words)
word_index = imdb.get_word_index()  # Get word-to-index mapping

# Function to convert words to numerical sequence
def encode_text(text):
    tokens = text.lower().split()  # Tokenize words
    encoded = [word_index[word] + 3 if word in word_index else 2 for word in tokens]  # Convert words to indexes
    return pad_sequences([encoded], maxlen=max_len, padding="post")  # Pad sequence

# Function to predict sentiment
def predict_sentiment(text, model):
    encoded_text = encode_text(text)
    prediction = model.predict(encoded_text)[0][0]
    sentiment = "Positive 😊" if prediction > 0.5 else "Negative 😞"
    return sentiment

# Example usage
new_review = "The movie was amazing! I loved the plot and the characters."
print(predict_sentiment(new_review, model))


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json
[1m1641221/1641221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 474ms/step
Positive 😊
