In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import load_model




In [2]:
# Load the IMDB dateset word index
word_index=imdb.get_word_index()
reverse_word_index={value:key for key,value in  word_index.items()}

In [3]:
# load the pre-trained model with RelU activation
model=load_model('simple_rnn_imdb.h5')




In [4]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 500, 128)          1280000   
                                                                 
 simple_rnn (SimpleRNN)      (None, 128)               32896     
                                                                 
 dense (Dense)               (None, 1)                 129       
                                                                 
Total params: 1313025 (5.01 MB)
Trainable params: 1313025 (5.01 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [5]:
# Gives us all the weights
model.get_weights()

[array([[ 0.00771596, -0.02975669, -0.01434987, ...,  0.03141345,
         -0.02214464,  0.0306344 ],
        [-0.00466375, -0.03576496,  0.04118659, ..., -0.03700478,
          0.00859951,  0.04937341],
        [-0.01632436, -0.00241411, -0.05339053, ...,  0.03326719,
         -0.00181999, -0.01436651],
        ...,
        [-0.04183976,  0.01830151,  0.03514445, ...,  0.05844026,
          0.04775357, -0.02452803],
        [-0.02177731,  0.02831649, -0.03127552, ..., -0.05348991,
         -0.0603427 , -0.03502457],
        [-0.01937093, -0.08628052, -0.00919435, ...,  0.00664616,
          0.08389847,  0.03017515]], dtype=float32),
 array([[ 0.02786785, -0.1537148 ,  0.0516375 , ..., -0.08859527,
         -0.04186559,  0.08194309],
        [ 0.11441379, -0.1200168 ,  0.09530655, ..., -0.05223194,
         -0.06732467,  0.05866151],
        [-0.12512746,  0.05287352, -0.04569966, ..., -0.10698143,
          0.08099233, -0.14301921],
        ...,
        [ 0.14230652, -0.11248031,  0.1

In [6]:
# step2:Helper functions
# Function to decode reviews
def decode_review(encoded_review):
    return ' '.join([reverse_word_index.get(i-3,'?') for i in encoded_review])

# Function to preprocess user input
def preprocess_text(text):
    words=text.lower().split()
    encoded_review = []
    for word in words:
        index = word_index.get(word, 2) + 3  # +3 for IMDB offset
        # +3 is used because the first 3 indices (0, 1, 2) are reserved for special tokens in the IMDB dataset.

# So, the IMDB word indices start from 1, but when you actually train or predict,
# the model expects all real words to start from index 3.
        if index >= 10000:  # outside vocabulary
            index = 2  # map to <UNK> token
        encoded_review.append(index)
    padded_review=sequence.pad_sequences([encoded_review],maxlen=500)
    return padded_review

In [7]:
# Prediction function
def predict_sentiment(review):
    preprocessed_input=preprocess_text(review)
    prediction=model.predict(preprocessed_input)
    sentiment='Positive' if prediction[0][0] >0.5 else 'Negative'
    return sentiment,prediction[0][0]

In [8]:
# Example Review for prediction
example_review="The movie was fantastic! The acting wa great and the plot was thrilling"
sentiment,score=predict_sentiment(example_review)
print(f"Review :{example_review}")
print(f"Sentiment :{sentiment}")
print(f"Prediction score: {score}")

Review :The movie was fantastic! The acting wa great and the plot was thrilling
Sentiment :Positive
Prediction score: 0.7990087270736694
