In [1]:
from tensorflow.keras.datasets import imdb
# We'll only use the top 10,000 most frequent words to keep it simple
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=10000)

#  Print the number of training and test samples
print("Number of training samples:", len(X_train))
print("Number of test samples:", len(X_test))

print("Sample review (word indices):", X_train[0])
print("Label (0 = Negative, 1 = Positive):", y_train[0])


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Number of training samples: 25000
Number of test samples: 25000
Sample review (word indices): [1, 14, 22, 16, 43, 530, 973, 1622, 1385, 65, 458, 4468, 66, 3941, 4, 173, 36, 256, 5, 25, 100, 43, 838, 112, 50, 670, 2, 9, 35, 480, 284, 5, 150, 4, 172, 112, 167, 2, 336, 385, 39, 4, 172, 4536, 1111, 17, 546, 38, 13, 447, 4, 192, 50, 16, 6, 147, 2025, 19, 14, 22, 4, 1920, 4613, 469, 4, 22, 71, 87, 12, 16, 43, 530, 38, 76, 15, 13, 1247, 4, 22, 17, 515, 17, 12, 16, 626, 18, 2, 5, 62, 386, 12, 8, 316, 8, 106, 5, 4, 2223, 5244, 16, 480, 66, 3785, 33, 4, 130, 12, 16, 38, 619, 5, 25, 124, 51, 36, 135, 48, 25, 1415, 33, 6, 22, 12, 215, 28, 77, 52, 5, 14, 407, 16, 82, 2, 8, 4, 107, 117, 5952, 15, 256, 4, 2, 7, 3766, 5, 723, 36, 71, 43, 530, 476, 26, 400, 317, 46, 7, 4, 2, 1029, 13, 104, 88, 4, 381, 15, 297, 98, 32, 2071, 56, 2

In [2]:
# Step 2: Padding the sequences so all reviews are of the same length

from tensorflow.keras.preprocessing.sequence import pad_sequences

# We will set the maximum review length to 200
maxlen = 200

X_train = pad_sequences(X_train, maxlen=maxlen)
X_test = pad_sequences(X_test, maxlen=maxlen)

# Print shape after padding
print("Shape of training data:", X_train.shape)
print("Shape of test data:", X_test.shape)


Shape of training data: (25000, 200)
Shape of test data: (25000, 200)


In [3]:
# Step 3: Creating the LSTM model

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense

model = Sequential()

# Embedding layer to learn word meanings
model.add(Embedding(input_dim=10000, output_dim=128, input_length=maxlen))

# LSTM layer to learn patterns in text
model.add(LSTM(units=64))

# Final layer to predict sentiment (0 or 1)
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Show the model summary
model.summary()




In [4]:
# Step 4: Training the model

history = model.fit(X_train, y_train,
                    epochs=3,
                    batch_size=64,
                    validation_data=(X_test, y_test))


Epoch 1/3
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m136s[0m 338ms/step - accuracy: 0.7305 - loss: 0.5053 - val_accuracy: 0.8667 - val_loss: 0.3138
Epoch 2/3
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m119s[0m 303ms/step - accuracy: 0.9090 - loss: 0.2392 - val_accuracy: 0.8662 - val_loss: 0.3147
Epoch 3/3
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m134s[0m 342ms/step - accuracy: 0.9297 - loss: 0.1932 - val_accuracy: 0.8637 - val_loss: 0.3415


In [5]:
# Step 5: Evaluating the model

loss, accuracy = model.evaluate(X_test, y_test)
print("Test Accuracy: {:.2f}%".format(accuracy * 100))


[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 33ms/step - accuracy: 0.8607 - loss: 0.3450
Test Accuracy: 86.37%


In [6]:
from tensorflow.keras.datasets import imdb
word_index = imdb.get_word_index()
#to convert text into word Ids


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json
[1m1641221/1641221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [7]:
# Function to convert review text to padded sequence
from tensorflow.keras.preprocessing.sequence import pad_sequences

def review_to_sequence(review, word_index, maxlen=200):
    # Convert to lowercase and split into words
    review = review.lower().split()

    # Convert words to their index in IMDb word_index
    encoded = [word_index.get(word, 2) for word in review]  # 2 = unknown word

    # Pad the sequence so it's same length as training data
    padded = pad_sequences([encoded], maxlen=maxlen)
    return padded


In [11]:
# 👇 Replace the review text with anything you like
my_review = "acting and direction were excellent."

# Convert text to sequence
my_sequence = review_to_sequence(my_review, word_index)

# Predict sentiment using your trained model
prediction = model.predict(my_sequence)

# Print result
print("Prediction Score:", prediction[0][0])
print("Predicted Sentiment:", "Positive 😀" if prediction[0][0] > 0.5 else "Negative 😠")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 85ms/step
Prediction Score: 0.53678006
Predicted Sentiment: Positive 😀
