In [1]:
# Binary classification using Deep Neural Networks Example: Classify movie
#  reviews into positive" reviews and "negative" reviews, just based on the text content of the reviews.Use
#  IMDB datase

In [6]:
import numpy as np  # Importing NumPy, a package for numerical computing in Python (used for arrays, matrices, etc.)
from keras.datasets import imdb  # Importing the IMDB dataset from Keras, which is pre-tokenized and used for sentiment analysis
from keras import models, layers  # Importing Keras model and layers modules for defining and building the neural network


In [None]:
pip install Keras-Preprocessing

In [8]:
# Load the IMDB dataset from Keras, considering only the top 10,000 most frequent words
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=10000)
# 'x_train' and 'x_test' contain the tokenized reviews (sequences of integers representing words)
# 'y_train' and 'y_test' contain the corresponding labels (0 for negative sentiment, 1 for positive sentiment)
# The 'num_words=10000' parameter ensures only the 10,000 most frequent words are considered, ignoring rare words


In [9]:
# Convert sequences of word indices into binary vector form
def vectorize(sequences, dim=10000):
    result = np.zeros((len(sequences), dim))
    for i, seq in enumerate(sequences):
        result[i, seq] = 1
    return result

x_train = vectorize(x_train)
x_test = vectorize(x_test)

In [13]:
from tensorflow.keras import layers, models

# Define a simple deep neural network model
model = models.Sequential([  # Sequential model: stack layers linearly
    layers.Input(shape=(10000,)),  # Explicit input layer: expects input with 10,000 features (e.g., tokenized and padded text)
    
    layers.Dense(16, activation='relu'),  # Fully connected layer with 16 neurons and ReLU activation
    # ReLU (Rectified Linear Unit) activation allows for non-linearity and helps with training deep networks.

    layers.Dense(1, activation='sigmoid')  # Output layer with 1 neuron for binary classification (0 or 1)
    # Sigmoid activation ensures the output is a probability between 0 and 1, suitable for binary classification.
])


In [11]:
# Compile the model with appropriate optimizer, loss function, and metric
model.compile(
    optimizer='adam',  # Adam optimizer: adaptive learning rate optimization algorithm that combines the benefits of other optimizers
    loss='binary_crossentropy',  # Loss function: binary cross-entropy is used for binary classification problems (0 or 1 labels)
    metrics=['accuracy']  # Metric: accuracy is used to evaluate how many predictions match the true labels
)


In [12]:
# Train the model on training data with a validation split
model.fit(
    x_train,  # Training data (input features)
    y_train,  # Training labels (target values)
    epochs=5,  # Number of times the entire dataset is passed through the model during training
    batch_size=512,  # Number of samples per gradient update (how many samples to process before updating model weights)
    validation_split=0.2  # 20% of the training data will be used for validation during training
)


Epoch 1/5
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 16ms/step - accuracy: 0.7083 - loss: 0.5897 - val_accuracy: 0.8748 - val_loss: 0.3636
Epoch 2/5
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.9019 - loss: 0.3049 - val_accuracy: 0.8850 - val_loss: 0.2997
Epoch 3/5
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.9266 - loss: 0.2323 - val_accuracy: 0.8882 - val_loss: 0.2804
Epoch 4/5
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - accuracy: 0.9382 - loss: 0.1936 - val_accuracy: 0.8920 - val_loss: 0.2746
Epoch 5/5
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - accuracy: 0.9491 - loss: 0.1661 - val_accuracy: 0.8876 - val_loss: 0.2783


<keras.src.callbacks.history.History at 0x1fb1a4d38b0>

In [20]:
# Evaluate the model on test data
results = model.evaluate(x_test, y_test)



[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.8833 - loss: 0.2899


In [None]:
# The model achieved an accuracy of 88.33%, meaning it correctly predicted the
# class (e.g., positive or negative sentiment) for 88.33% of the test or validation samples. 
# The loss of 0.2899 represents how far the model's predictions 
# are from the actual labels, with lower loss indicating better performance.