# Loading Libraries

In [1]:
from tensorflow.keras.datasets import imdb
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, LSTM
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [2]:
import numpy as np

In [3]:
vocab_size = 10000

In [4]:
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words = vocab_size)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [5]:
max_len = 200

In [6]:
X_train = pad_sequences(X_train, maxlen=max_len, padding='post', truncating='post')
X_test = pad_sequences(X_test, maxlen=max_len, padding='post', truncating='post')

In [7]:
X_train.shape, X_test.shape

((25000, 200), (25000, 200))

# Importing Glove Vector

In [8]:
!wget http://nlp.stanford.edu/data/glove.6B.zip

--2025-05-02 07:16:28--  http://nlp.stanford.edu/data/glove.6B.zip
Resolving nlp.stanford.edu (nlp.stanford.edu)... 171.64.67.140
Connecting to nlp.stanford.edu (nlp.stanford.edu)|171.64.67.140|:80... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://nlp.stanford.edu/data/glove.6B.zip [following]
--2025-05-02 07:16:28--  https://nlp.stanford.edu/data/glove.6B.zip
Connecting to nlp.stanford.edu (nlp.stanford.edu)|171.64.67.140|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://downloads.cs.stanford.edu/nlp/data/glove.6B.zip [following]
--2025-05-02 07:16:28--  https://downloads.cs.stanford.edu/nlp/data/glove.6B.zip
Resolving downloads.cs.stanford.edu (downloads.cs.stanford.edu)... 171.64.64.22
Connecting to downloads.cs.stanford.edu (downloads.cs.stanford.edu)|171.64.64.22|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 862182613 (822M) [application/zip]
Saving to: ‘glove.6B.zip’


202

In [9]:
!unzip glove.6B.zip

Archive:  glove.6B.zip
  inflating: glove.6B.50d.txt        
  inflating: glove.6B.100d.txt       
  inflating: glove.6B.200d.txt       
  inflating: glove.6B.300d.txt       


In [10]:
embedding_index ={}
with open('glove.6B.100d.txt', encoding='utf-8') as f:
  for line in f:
    values = line.split()
    word = values[0]
    coefs = np.asarray(values[1:], dtype='float32')
    embedding_index[word] = coefs

In [11]:
word_index = imdb.get_word_index()
embedding_dim = 100
embedding_matrix = np.zeros((vocab_size,embedding_dim))
for word, i in imdb.get_word_index().items():
  if i < vocab_size:
    embedding_vector = embedding_index.get(word)
    if embedding_vector is not None:
      embedding_matrix[i] = embedding_vector

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json
[1m1641221/1641221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [16]:
model = Sequential([
    Embedding(vocab_size,embedding_dim,input_length=max_len, weights=[embedding_matrix], trainable = False),
    LSTM(32, dropout=0.2, recurrent_dropout=0.2, return_sequences=False),
    Dense(16, activation = 'relu'),
    Dense(1, activation='sigmoid')
])



In [17]:
model.compile(
    optimizer = 'rmsprop',
    loss = 'binary_crossentropy',
    metrics = ['accuracy']
)

In [18]:
history = model.fit(
    X_train, y_train,
    batch_size=32,
    epochs=10,
    validation_split=0.2
)

Epoch 1/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m199s[0m 313ms/step - accuracy: 0.4987 - loss: 0.6948 - val_accuracy: 0.5010 - val_loss: 0.6934
Epoch 2/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m202s[0m 324ms/step - accuracy: 0.5110 - loss: 0.6929 - val_accuracy: 0.5058 - val_loss: 0.6930
Epoch 3/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m260s[0m 320ms/step - accuracy: 0.5073 - loss: 0.6922 - val_accuracy: 0.5166 - val_loss: 0.6916
Epoch 4/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m202s[0m 319ms/step - accuracy: 0.5175 - loss: 0.6899 - val_accuracy: 0.5086 - val_loss: 0.6948
Epoch 5/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m197s[0m 312ms/step - accuracy: 0.5331 - loss: 0.6886 - val_accuracy: 0.5376 - val_loss: 0.6878
Epoch 6/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m205s[0m 318ms/step - accuracy: 0.5402 - loss: 0.6857 - val_accuracy: 0.5842 - val_loss: 0.6799
Epoc

In [19]:
history2 = model.fit(
    X_train, y_train,
    batch_size=32,
    initial_epoch = 10,
    epochs=20,
    validation_split=0.2
)

Epoch 11/20
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m202s[0m 323ms/step - accuracy: 0.6128 - loss: 0.6591 - val_accuracy: 0.6302 - val_loss: 0.6531
Epoch 12/20
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m202s[0m 323ms/step - accuracy: 0.6145 - loss: 0.6591 - val_accuracy: 0.6370 - val_loss: 0.6519
Epoch 13/20
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m200s[0m 320ms/step - accuracy: 0.6266 - loss: 0.6512 - val_accuracy: 0.6300 - val_loss: 0.6525
Epoch 14/20
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m199s[0m 315ms/step - accuracy: 0.6354 - loss: 0.6422 - val_accuracy: 0.5916 - val_loss: 0.6582
Epoch 15/20
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m200s[0m 313ms/step - accuracy: 0.6245 - loss: 0.6464 - val_accuracy: 0.6362 - val_loss: 0.6553
Epoch 16/20
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m203s[0m 315ms/step - accuracy: 0.6396 - loss: 0.6398 - val_accuracy: 0.6468 - val_loss: 0.644

# Final Loss And Accuracy

In [20]:
loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f"Test Accuracy: {accuracy*100:.2f}%")

Test Accuracy: 65.18%


# Prediction On Unseen Data

In [21]:
word_index = imdb.get_word_index()
reverse_word_index = {v: k for k, v in word_index.items()}

def decode_review(encoded_review):
    return ' '.join([reverse_word_index.get(i, '?') for i in encoded_review])

def predict_sentiment(text):
    encoded = [word_index.get(word.lower(), 0) for word in text.split()]
    padded = pad_sequences([encoded], maxlen=max_len)
    prob = model.predict(padded)[0][0]
    return "Positive" if prob > 0.5 else "Negative", prob

In [22]:
sample_reviews = [
    "This movie was fantastic! The acting was brilliant.",
    "Terrible plot and awful acting. Waste of time.",
    "It was okay, not great but not bad either."
]

In [23]:
for review in sample_reviews:
    sentiment, confidence = predict_sentiment(review)
    print(f"Review: '{review[:50]}...'")
    print(f"Prediction: {sentiment} ({confidence:.4f})\n")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 758ms/step
Review: 'This movie was fantastic! The acting was brilliant...'
Prediction: Positive (0.6415)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 125ms/step
Review: 'Terrible plot and awful acting. Waste of time....'
Prediction: Positive (0.6464)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 128ms/step
Review: 'It was okay, not great but not bad either....'
Prediction: Positive (0.5487)

