<a href="https://colab.research.google.com/github/bhargav23/AIML-DL-Lab/blob/main/IMDB_Movie_Review_Classifier.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**2. Design a neural network for classifying movie reviews (Binary Classification)using IMDB dataset.**

In [1]:
import tensorflow as tf
from tensorflow import keras
from keras.datasets import imdb
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Embedding, GlobalAveragePooling1D, Dense, Dropout

In [2]:
# --- 1. Define Parameters ---
# The vocabulary size. We will only consider the top 10,000 most common words.
VOCAB_SIZE = 10000
# The maximum length of a review (in words). Reviews longer than this will be truncated.
MAX_LEN = 256
# The dimension of the word embedding vector.
EMBEDDING_DIM = 16
# The size of the batches for training.
BATCH_SIZE = 512
# The number of epochs to train for.
EPOCHS = 10

In [3]:
# --- 2. Load and Preprocess the Data ---
print("Loading IMDB dataset...")
# Load the dataset, keeping only the top `VOCAB_SIZE` most frequent words.
# The data is already pre-tokenized, with each word represented by an integer.
(train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words=VOCAB_SIZE)

print(f"Number of training sequences: {len(train_data)}")
print(f"Number of testing sequences: {len(test_data)}")

# Pad the sequences to ensure they all have the same length (`MAX_LEN`).
# Shorter reviews will be padded with 0s, and longer ones will be truncated.
print("Padding sequences...")
train_data = pad_sequences(train_data, maxlen=MAX_LEN, padding='post', truncating='post')
test_data = pad_sequences(test_data, maxlen=MAX_LEN, padding='post', truncating='post')

print(f"Shape of training data after padding: {train_data.shape}")
print(f"Shape of testing data after padding: {test_data.shape}")

Loading IMDB dataset...
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Number of training sequences: 25000
Number of testing sequences: 25000
Padding sequences...
Shape of training data after padding: (25000, 256)
Shape of testing data after padding: (25000, 256)


In [4]:
# --- 3. Build the Neural Network Model ---
print("Building the model...")
model = Sequential([
    # 1. Embedding Layer: This layer takes the integer-encoded vocabulary
    # and looks up the embedding vector for each word-index. These vectors are
    # learned as the model trains.
    Embedding(input_dim=VOCAB_SIZE, output_dim=EMBEDDING_DIM, input_length=MAX_LEN),

    # 2. GlobalAveragePooling1D: This layer returns a fixed-length output vector
    # for each example by averaging over the sequence dimension. This is a simple
    # way to handle variable-length inputs.
    GlobalAveragePooling1D(),

    # 3. Hidden Dense Layer: A standard fully-connected layer with ReLU activation.
    # It learns higher-level patterns from the features extracted by previous layers.
    Dense(16, activation='relu'),

    # 4. Dropout Layer: Helps prevent overfitting by randomly setting a fraction
    # of input units to 0 at each update during training time.
    Dropout(0.5),

    # 5. Output Layer: A single neuron with a sigmoid activation function.
    # The output is a float between 0 and 1, representing the probability
    # that the review is positive.
    Dense(1, activation='sigmoid')
])

Building the model...




In [5]:
# --- 4. Compile the Model ---
# We need to specify the optimizer, loss function, and metrics for the model.
print("Compiling the model...")
model.compile(
    optimizer='adam',
    loss='binary_crossentropy', # Ideal for binary (0 or 1) classification problems.
    metrics=['accuracy']
)

# Print a summary of the model's architecture
model.summary()

Compiling the model...


In [6]:
# --- 5. Train the Model ---
print("\n--- Training the model ---")
# Create a validation set from the training data to monitor performance.
x_val = train_data[:10000]
partial_x_train = train_data[10000:]

y_val = train_labels[:10000]
partial_y_train = train_labels[10000:]

# Train the model on the training data.
history = model.fit(
    partial_x_train,
    partial_y_train,
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    validation_data=(x_val, y_val),
    verbose=1
)


--- Training the model ---
Epoch 1/10
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 27ms/step - accuracy: 0.5177 - loss: 0.6925 - val_accuracy: 0.6856 - val_loss: 0.6880
Epoch 2/10
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 21ms/step - accuracy: 0.5948 - loss: 0.6855 - val_accuracy: 0.6506 - val_loss: 0.6774
Epoch 3/10
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 20ms/step - accuracy: 0.6317 - loss: 0.6728 - val_accuracy: 0.6611 - val_loss: 0.6584
Epoch 4/10
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 22ms/step - accuracy: 0.6820 - loss: 0.6508 - val_accuracy: 0.7154 - val_loss: 0.6303
Epoch 5/10
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 21ms/step - accuracy: 0.7227 - loss: 0.6197 - val_accuracy: 0.7744 - val_loss: 0.5933
Epoch 6/10
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 20ms/step - accuracy: 0.7558 - loss: 0.5850 - val_accuracy: 0.8077 - val_loss: 0.5535
Epoch 

In [7]:
# --- 6. Evaluate the Model ---
print("\n--- Evaluating the model ---")
# Evaluate the trained model on the test set.
results = model.evaluate(test_data, test_labels, verbose=2)

print(f"\nTest Loss: {results[0]:.4f}")
print(f"Test Accuracy: {results[1]:.4f}")

# --- 7. Example Prediction ---
# You can use the trained model to make predictions on new data.
# Let's see the prediction for the first test review.
prediction = model.predict(test_data[0:1])
print(f"\nPrediction for first test review: {prediction[0][0]:.4f}")
print(f"Actual label for first test review: {test_labels[0]}")
print("A prediction > 0.5 is considered positive, and <= 0.5 is negative.")


--- Evaluating the model ---
782/782 - 1s - 2ms/step - accuracy: 0.8382 - loss: 0.4324

Test Loss: 0.4324
Test Accuracy: 0.8382
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 73ms/step

Prediction for first test review: 0.3738
Actual label for first test review: 0
A prediction > 0.5 is considered positive, and <= 0.5 is negative.
