In [71]:
import numpy as np
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, GRU, Dense

# Load IMDB dataset
num_words = 10000
max_len = 200
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=num_words)

# Pad sequences
x_train = pad_sequences(x_train, maxlen=max_len, padding='post')
x_test = pad_sequences(x_test, maxlen=max_len, padding='post')

# Build GRU model
model = Sequential([
    Embedding(input_dim=num_words, output_dim=64),
    GRU(64),
    Dense(1, activation='sigmoid')
])
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])


# Train model
model.fit(x_train, y_train, epochs=5, batch_size=128, validation_split=0.2)
model.summary()
# Function to encode review
word_index = imdb.get_word_index()
def encode_review(review):
    encoded = []
    for word in review.lower().split():
        idx = word_index.get(word, 2) + 3  # 2 = OOV
        if idx >= num_words:
            idx = 2
        encoded.append(idx)
    return pad_sequences([encoded], maxlen=max_len, padding='post')

# Function to predict sentiment
def predict_sentiment(review_text):
    encoded = encode_review(review_text)
    pred = model.predict(encoded)
    return "Positive" if pred[0][0] > 0.5 else "Negative"

# Test reviews
review1 = "I loved the movie, it was fantastic and amazing"
review2 = "The movie was terrible and I hated it"

print(f"Review: {review1}\nPredicted Sentiment: {predict_sentiment(review1)}")
print(f"Review: {review2}\nPredicted Sentiment: {predict_sentiment(review2)}")


Epoch 1/5
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 15ms/step - accuracy: 0.5306 - loss: 0.6878 - val_accuracy: 0.5774 - val_loss: 0.6552
Epoch 2/5
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 12ms/step - accuracy: 0.6492 - loss: 0.6137 - val_accuracy: 0.8170 - val_loss: 0.4389
Epoch 3/5
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 12ms/step - accuracy: 0.8636 - loss: 0.3464 - val_accuracy: 0.8742 - val_loss: 0.3140
Epoch 4/5
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 12ms/step - accuracy: 0.9200 - loss: 0.2209 - val_accuracy: 0.8876 - val_loss: 0.2962
Epoch 5/5
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 14ms/step - accuracy: 0.9514 - loss: 0.1432 - val_accuracy: 0.8876 - val_loss: 0.3015


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 155ms/step
Review: I loved the movie, it was fantastic and amazing
Predicted Sentiment: Positive
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
Review: The movie was terrible and I hated it
Predicted Sentiment: Negative


In [72]:
import numpy as np
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, GRU, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
import re



In [73]:
num_words = 20000   # top 10k words
max_len = 200       # max sequence length
embedding_dim = 64

In [74]:
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=num_words)

# Pad sequences to the same length
x_train = pad_sequences(x_train, maxlen=max_len, padding='post')
x_test = pad_sequences(x_test, maxlen=max_len, padding='post')

print(f"Training samples: {x_train.shape}")
print(f"Test samples: {x_test.shape}")

Training samples: (25000, 200)
Test samples: (25000, 200)


In [75]:
model = Sequential([
    Embedding(input_dim=num_words, output_dim=embedding_dim),
    GRU(128, dropout=0.3, recurrent_dropout=0.3),
    Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(1, activation='sigmoid')
])


In [76]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])


In [77]:
early_stop = EarlyStopping(monitor='val_loss', patience=2, restore_best_weights=True)


In [79]:
history = model.fit(
    x_train, y_train,
    validation_split=0.2,
    epochs=3,
    batch_size=128,
    callbacks=[early_stop],
    verbose=1
)


Epoch 1/3
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m92s[0m 584ms/step - accuracy: 0.5852 - loss: 0.6538 - val_accuracy: 0.5984 - val_loss: 0.6244
Epoch 2/3
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m92s[0m 586ms/step - accuracy: 0.6542 - loss: 0.5578 - val_accuracy: 0.6382 - val_loss: 0.5986
Epoch 3/3
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m90s[0m 575ms/step - accuracy: 0.7626 - loss: 0.4649 - val_accuracy: 0.8314 - val_loss: 0.3934


In [80]:
loss, accuracy = model.evaluate(x_test, y_test)
print(f"\nTest Accuracy: {accuracy*100:.2f}%")


[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m91s[0m 116ms/step - accuracy: 0.8279 - loss: 0.4039

Test Accuracy: 82.99%


In [81]:
model.summary()

In [82]:
word_index = imdb.get_word_index()

def encode_review(review):
    review = re.sub(r"[^\w\s]", "", review.lower())
    tokens = review.split()
    seq = [word_index.get(word, 2)+3 for word in tokens]  # +3 offset for Keras
    seq = [idx if idx < num_words else 2 for idx in seq]  # handle OOV
    return pad_sequences([seq], maxlen=max_len, padding='post')

def predict_sentiment(review_text):
    encoded = encode_review(review_text)
    pred = model.predict(encoded)[0][0]
    sentiment = "Positive" if pred > 0.5 else "Negative"
    return sentiment, pred


In [83]:

review_pos = "I loved the movie, it was fantastic and amazing"
review_neg = "The movie was terrible and I hated it"

print("\nPositive Review Test:", predict_sentiment(review_pos))
print("Negative Review Test:", predict_sentiment(review_neg))


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 448ms/step

Positive Review Test: ('Positive', np.float32(0.8940293))
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 138ms/step
Negative Review Test: ('Negative', np.float32(0.32887846))
