In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models


In [None]:
# Load IMDB dataset (text reviews + labels)
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.imdb.load_data(num_words=10000)

print("Training samples:", len(X_train))
print("Testing samples:", len(X_test))


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Training samples: 25000
Testing samples: 25000


In [None]:
# Pad sequences to same length
X_train = tf.keras.preprocessing.sequence.pad_sequences(X_train, maxlen=200)
X_test = tf.keras.preprocessing.sequence.pad_sequences(X_test, maxlen=200)


In [None]:
model = models.Sequential([
    layers.Embedding(input_dim=10000, output_dim=32, input_length=200),
    layers.GlobalAveragePooling1D(),
    layers.Dense(16, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])

model.summary()




In [None]:
model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

history = model.fit(
    X_train, y_train,
    epochs=5,
    batch_size=512,
    validation_split=0.2
)


Epoch 1/5
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 35ms/step - accuracy: 0.5582 - loss: 0.6911 - val_accuracy: 0.6770 - val_loss: 0.6773
Epoch 2/5
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 28ms/step - accuracy: 0.7105 - loss: 0.6692 - val_accuracy: 0.7408 - val_loss: 0.6295
Epoch 3/5
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 29ms/step - accuracy: 0.7678 - loss: 0.6116 - val_accuracy: 0.7894 - val_loss: 0.5489
Epoch 4/5
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 28ms/step - accuracy: 0.8098 - loss: 0.5224 - val_accuracy: 0.8262 - val_loss: 0.4666
Epoch 5/5
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 29ms/step - accuracy: 0.8422 - loss: 0.4384 - val_accuracy: 0.8398 - val_loss: 0.4091


In [None]:
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_acc * 100:.2f}%")


[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.8324 - loss: 0.4120
Test Accuracy: 83.39%


In [None]:
# Load word index
word_index = tf.keras.datasets.imdb.get_word_index()

def encode_review(text):
    words = text.lower().split()
    encoded = [word_index.get(word, 2) for word in words]
    padded = tf.keras.preprocessing.sequence.pad_sequences([encoded], maxlen=200)
    return padded

# Test sentences
review = "this movie was very interesting and amazing"
prediction = model.predict(encode_review(review))

print("Positive" if prediction[0][0] > 0.5 else "Negative")


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json
[1m1641221/1641221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 124ms/step
Positive
