In [9]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import load_model

In [10]:
# Load the IMDB dateset word index
word_index=imdb.get_word_index()
reverse_word_index={value:key for key,value in  word_index.items()}

In [11]:
# load the pre-trained model with RelU activation
model=load_model('simple_rnn_imdb.h5')

In [12]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_1 (Embedding)     (None, 500, 128)          1280000   
                                                                 
 simple_rnn_1 (SimpleRNN)    (None, 128)               32896     
                                                                 
 dense_1 (Dense)             (None, 1)                 129       
                                                                 
Total params: 1313025 (5.01 MB)
Trainable params: 1313025 (5.01 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [13]:
# Gives us all the weights
model.get_weights()

[array([[-0.05151509, -0.00078408,  0.04292239, ...,  0.03423623,
         -0.03954397, -0.00294153],
        [-0.05283682,  0.01704437,  0.00226647, ..., -0.04097271,
         -0.01475801,  0.02370729],
        [-0.00931496,  0.01929478, -0.03846863, ...,  0.00806098,
         -0.0057681 ,  0.01348312],
        ...,
        [-0.01125321, -0.0101937 , -0.00945954, ...,  0.0021883 ,
          0.01830335, -0.02355617],
        [-0.01005434,  0.03216004,  0.02260523, ...,  0.03903852,
          0.00799843, -0.03987047],
        [-0.06271159,  0.00674492, -0.05280048, ..., -0.02208824,
         -0.07128551,  0.00280789]], dtype=float32),
 array([[ 0.06118941, -0.04959775, -0.04515604, ..., -0.03550302,
         -0.0097693 , -0.00619145],
        [-0.06538703,  0.12388486,  0.09988923, ...,  0.11024152,
          0.15029883, -0.11052712],
        [-0.00592877,  0.10916686,  0.10680263, ..., -0.15585843,
         -0.03173389, -0.12157616],
        ...,
        [ 0.17275369, -0.03241911, -0.1

In [14]:
# step2:Helper functions
# Function to decode reviews
def decode_review(encoded_review):
    return ' '.join([reverse_word_index.get(i-3,'?') for i in encoded_review])

# Function to preprocess user input
def preprocess_text(text):
    words=text.lower().split()
    encoded_review = []
    for word in words:
        index = word_index.get(word, 2) + 3  # +3 for IMDB offset
        # +3 is used because the first 3 indices (0, 1, 2) are reserved for special tokens in the IMDB dataset.

# So, the IMDB word indices start from 1, but when you actually train or predict,
# the model expects all real words to start from index 3.
        if index >= 10000:  # outside vocabulary
            index = 2  # map to <UNK> token
        encoded_review.append(index)
    padded_review=sequence.pad_sequences([encoded_review],maxlen=500)
    return padded_review

In [15]:
# Prediction function
def predict_sentiment(review):
    preprocessed_input=preprocess_text(review)
    prediction=model.predict(preprocessed_input)
    sentiment='Positive' if prediction[0][0] >0.5 else 'Negative'
    return sentiment,prediction[0][0]

In [16]:
# Example Review for prediction
example_review="The movie was fantastic! The acting wa great and the plot was thrilling"
sentiment,score=predict_sentiment(example_review)
print(f"Review :{example_review}")
print(f"Sentiment :{sentiment}")
print(f"Prediction score: {score}")

Review :The movie was fantastic! The acting wa great and the plot was thrilling
Sentiment :Negative
Prediction score: 0.48328328132629395
