In [28]:
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Embedding, GlobalAveragePooling1D, Dense, LSTM, Dropout
from keras.datasets import imdb
import numpy as np
from keras.models import load_model


In [29]:
# Constants for data preprocessing
# Constants for data preprocessing
max_length = 256  # Maximum length of the sequences
padding_type = 'post'  # Padding type for sequences shorter than the maximum length
vocab_size = 1000000  # Size of the vocabulary used in the Embedding layer
embedding_dim = 64  # Dimensionality of the embedding layer (increased)
hidden_units = 64  # Number of hidden units in the LSTM layer (increased)
dropout_rate = 0.5  # Dropout rate for regularization

# Load the IMDB dataset
(train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words=vocab_size)

# Helper function to preprocess data
def preprocess_data(data):
    return pad_sequences(data, maxlen=max_length, padding=padding_type)

# Preprocess the data
train_data = preprocess_data(train_data)
test_data = preprocess_data(test_data)

# Define the model architecture
def build_model(vocab_size, embedding_dim, hidden_units, dropout_rate):
    model = Sequential([
        Embedding(vocab_size, embedding_dim, input_length=max_length),
        LSTM(hidden_units, dropout=dropout_rate, recurrent_dropout=dropout_rate),  # LSTM layer with dropout
        Dense(hidden_units, activation='relu'),
        Dropout(dropout_rate),  # Dropout layer for regularization
        Dense(1, activation='sigmoid')
    ])
    return model



In [31]:
# Build and compile the model
model = build_model(vocab_size, embedding_dim, hidden_units, dropout_rate)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()



In [32]:
# Train and evaluate the model
history = model.fit(train_data, train_labels, epochs=10, batch_size=32, validation_data=(test_data, test_labels), verbose=2)
test_loss, test_acc = model.evaluate(test_data, test_labels, verbose=2)
print(f"Test Accuracy: {test_acc}, Test Loss: {test_loss}")

Epoch 1/10
782/782 - 446s - 570ms/step - accuracy: 0.5309 - loss: 0.6825 - val_accuracy: 0.5619 - val_loss: 0.6534
Epoch 2/10
782/782 - 426s - 545ms/step - accuracy: 0.5861 - loss: 0.6323 - val_accuracy: 0.5778 - val_loss: 0.6392
Epoch 3/10
782/782 - 442s - 565ms/step - accuracy: 0.6060 - loss: 0.5924 - val_accuracy: 0.5724 - val_loss: 0.6459
Epoch 4/10
782/782 - 423s - 541ms/step - accuracy: 0.6232 - loss: 0.5643 - val_accuracy: 0.5841 - val_loss: 0.6525
Epoch 5/10
782/782 - 442s - 565ms/step - accuracy: 0.6370 - loss: 0.5437 - val_accuracy: 0.6613 - val_loss: 0.6558
Epoch 6/10
782/782 - 444s - 567ms/step - accuracy: 0.8089 - loss: 0.4375 - val_accuracy: 0.8048 - val_loss: 0.4835
Epoch 7/10
782/782 - 440s - 562ms/step - accuracy: 0.8550 - loss: 0.3681 - val_accuracy: 0.8146 - val_loss: 0.4552
Epoch 8/10
782/782 - 441s - 564ms/step - accuracy: 0.8889 - loss: 0.2903 - val_accuracy: 0.8153 - val_loss: 0.4719
Epoch 9/10
782/782 - 442s - 565ms/step - accuracy: 0.9170 - loss: 0.2288 - val_a

In [33]:
#save the model
# Save the model to a file
model.save('sentiment_model.keras')

In [34]:
# Decode review function
word_index = imdb.get_word_index()
def decode_review(encoded_review):
    reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])
    return ' '.join([reverse_word_index.get(i - 3, '?') for i in encoded_review if i >= 3])

# Display incorrect predictions
def display_incorrect_predictions(test_data, test_labels, predictions, num_examples=3):
    predicted_classes = (predictions > 0.5).astype(int)
    incorrect_indices = np.where(predicted_classes.flatten() != test_labels)[0]
    for i, idx in enumerate(incorrect_indices[:num_examples]):
        print(f"Incorrect Prediction {i+1}:")
        print(f"Review: {decode_review(test_data[idx])}")
        print(f"Actual Sentiment: {'Positive' if test_labels[idx] == 1 else 'Negative'}")
        print(f"Predicted Sentiment: {'Positive' if predicted_classes[idx][0] == 1 else 'Negative'}")
        print("--------------------------------------------------------------------------------\n")

predictions = model.predict(test_data)
display_incorrect_predictions(test_data, test_labels, predictions)

# Predict sentiments for sample reviews and display them
def predict_and_display_reviews(reviews,model_gen):
    sequences = [[word_index.get(word, 2) for word in review.lower().split()] for review in reviews]
    padded_sequences = preprocess_data(sequences)
    sample_predictions = model_gen.predict(padded_sequences)
    sample_predicted_classes = (sample_predictions > 0.5).astype(int)
    for i, review in enumerate(reviews):
        print(f"Review {i+1}: {review}")
        print(f'Predicted Score: {sample_predictions[i]}')
        print(f"Predicted Sentiment: {'Positive' if sample_predicted_classes[i][0] == 1 else 'Negative'}")
        print("--------------------------------------------------------------------------------\n")


[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 103ms/step
Incorrect Prediction 1:
Review: i'm absolutely disgusted this movie isn't being sold all who love this movie should email disney and increase the demand for it they'd eventually have to sell it then i'd buy copies for everybody i know everything and everybody in this movie did a good job and i haven't figured out why disney hasn't put this movie on dvd or on vhs in rental stores at least i haven't seen any copies this is a wicked good movie and should be seen by all the kids in the new generation don't get to see it and i think they should it should at least be put back on the channel this movie doesn't deserve a cheap download it deserves the real thing i'm them now this movie will be on dvd
Actual Sentiment: Positive
Predicted Sentiment: Negative
--------------------------------------------------------------------------------

Incorrect Prediction 2:
Review: odessa steps the broad expanse of the steps are f

In [37]:
# Sample movie reviews
reviews = [
    "nothing was right about the movie. Pathetic story, slow pace, bad dialogues.",
    "This movie was an excellent portrayal of character development and had stellar acting.",
    "I found the movie to be predictable with a lackluster script.",
    "The cinematography was magnificent, and the pacing was perfect. Highly recommend watching.",
    "It was a terrible movie that wasted two hours of my life. The plot made no sense.",
    "An absolute masterpiece, with a gripping story and profound performances."
]
model_gen = load_model('sentiment_model.keras')
predict_and_display_reviews(reviews,model_gen)




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 279ms/step
Review 1: nothing was right about the movie. Pathetic story, slow pace, bad dialogues.
Predicted Score: [0.9015933]
Predicted Sentiment: Positive
--------------------------------------------------------------------------------

Review 2: This movie was an excellent portrayal of character development and had stellar acting.
Predicted Score: [0.9141325]
Predicted Sentiment: Positive
--------------------------------------------------------------------------------

Review 3: I found the movie to be predictable with a lackluster script.
Predicted Score: [0.5276026]
Predicted Sentiment: Positive
--------------------------------------------------------------------------------

Review 4: The cinematography was magnificent, and the pacing was perfect. Highly recommend watching.
Predicted Score: [0.6266946]
Predicted Sentiment: Positive
--------------------------------------------------------------------------------

Review