Importing the modules and suppressing the warnings

In [115]:
import warnings
warnings.filterwarnings('ignore')

import os
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

import numpy as np
import tensorflow as tf

Loading the Dataset from keras library itself

In [116]:
from tensorflow.keras.datasets import imdb #type: ignore
vocab_size=10000
max_len=100

(x_train, y_train),(x_test, y_test) = imdb.load_data(num_words=vocab_size)

padding words these to make them of equal sizes

In [117]:

from tensorflow.keras.preprocessing.sequence import pad_sequences #type: ignore
x_train=pad_sequences(x_train,maxlen=max_len,padding='post')
x_test=pad_sequences(x_test,maxlen=max_len,padding='post')

Creating the model

In [118]:

from tensorflow.keras.models import Sequential  #type: ignore
from tensorflow.keras.layers import Embedding, LSTM, Dropout, Dense #type: ignore
model=Sequential([
    Embedding(input_dim=vocab_size, output_dim=128,input_length=max_len),
    LSTM(32,dropout=0.5, recurrent_dropout=0.5),
    Dense(16,activation='relu'),
    Dropout(0.6),
    Dense(1,activation="sigmoid")
])

Compiling the model

In [119]:
model.compile(loss="binary_crossentropy",optimizer="adam",metrics=['accuracy'])


Training the model and loading the history

In [None]:
import pickle
from keras.models import load_model
if (os.path.exists('sentiment_analysis_model.keras')):
    model=load_model('sentiment_analysis_model.keras')
else:
    history = model.fit(x_train, y_train, epochs=5, batch_size=64, validation_data=(x_test, y_test))
    with open('training_history.pkl', 'wb') as f:
        pickle.dump(history.history, f)

with open('training_history.pkl', 'rb') as f:
    history = pickle.load(f)

Epoch 1/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 76ms/step - accuracy: 0.5913 - loss: 0.6569 - val_accuracy: 0.7685 - val_loss: 0.4845
Epoch 2/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 72ms/step - accuracy: 0.7746 - loss: 0.4864 - val_accuracy: 0.7706 - val_loss: 0.4600
Epoch 3/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 74ms/step - accuracy: 0.8209 - loss: 0.4310 - val_accuracy: 0.8111 - val_loss: 0.4214
Epoch 4/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 72ms/step - accuracy: 0.8530 - loss: 0.3741 - val_accuracy: 0.8264 - val_loss: 0.4022
Epoch 5/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 73ms/step - accuracy: 0.8687 - loss: 0.3384 - val_accuracy: 0.8252 - val_loss: 0.4087


Accuracy and Loss

In [122]:
loss,accuracy=model.evaluate(x_test, y_test)
print(f"Test Loss: {loss}\nTest Accuracy: {accuracy*100:.2f}%")


[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 11ms/step - accuracy: 0.8265 - loss: 0.4139
Test Loss: 0.40866273641586304
Test Accuracy: 82.52%


since the first 3 indexes are already reserved in keras dataset. We are updating the index to +3 index

In [None]:
word_index = imdb.get_word_index()
index_word = {v + 3: k for k, v in word_index.items()} 
word_index = {k: (v + 3) for k, v in word_index.items()}
word_index["<PAD>"] = 0
word_index["<START>"] = 1
word_index["<UNK>"] = 2
word_index["<UNUSED>"] = 3

Function to preprocessing the review

In [None]:
import nltk
import string
from textblob import TextBlob

def correct_spelling(text):
    return str(TextBlob(text).correct())

nltk.download("stopwords")
from nltk.corpus import stopwords
stop_words=set(stopwords.words('english'))
def preprocess_review(review,max_len=max_len):
    tokens=review.lower()
    text=tokens.translate(str.maketrans('','',string.punctuation))
    text=' '.join([word for word in text.split() if word not in stop_words])
    text=[word for word in text.split() if word.isalpha()]
    text = [correct_spelling(word) for word in text]
    sequence = [min(word_index.get(word, 2), vocab_size - 1) for word in text]
    padded=pad_sequences([sequence],maxlen=max_len,padding='post')
    return padded

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Rangra\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Input review

In [134]:
review=input("Enter your review: ")

preprocessed the review

In [135]:

padded_review=preprocess_review(review)

['now', 'cinematic', 'masterpiece', 'especially', 'loved', 'plot', 'went', 'absolutely', 'nowhere', 'two', 'hours', 'true', 'innovation', 'way', 'characters', 'made', 'worst', 'possible', 'decisions', 'every', 'five', 'minutes', 'genius', 'rare', 'see', 'film', 'boldly', 'commit', 'completely', 'forgettable', 'oscarworthy', 'truly']


Predicted the model. It gives a probability as we have used sigmoid as activation function in output layer. The output is a 2D array.

In [136]:

prediction=model.predict(padded_review)[0][0]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 90ms/step


Printing the sentiment of the review

In [137]:
if prediction >= 0.5:
    print(f"\n{review}\nSentiment : Positive ({prediction*100:.2f}%)")
else:
    print(f"\n{review}\nSentiment : Negative ({100- prediction*100:.2f}%)")


Wow, what a cinematic masterpiece! I especially loved how the plot went absolutely nowhere for two hours — true innovation! And the way the characters made the worst possible decisions every five minutes? Genius. It's rare to see a film so boldly commit to being completely forgettable. Oscar-worthy, truly.
Sentiment : Negative (92.85%)


This is all there is to it