In [88]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import load_model

In [89]:
# Load the IMDB dateset word index
word_index=imdb.get_word_index()
reverse_word_index={value:key for key,value in  word_index.items()}

In [90]:
# load the pre-trained model with RelU activation
model=load_model('simple_rnn_imdb.h5')



In [91]:
model.summary()

In [92]:
# Gives us all the weights
model.get_weights()

[array([[ 0.00360293, -0.04724076, -0.00636177, ...,  0.0501289 ,
          0.03515252,  0.00402705],
        [ 0.02371501, -0.02856575, -0.01439023, ...,  0.00484169,
         -0.01945996,  0.04501966],
        [-0.02243872,  0.01674434,  0.03295131, ..., -0.0109915 ,
          0.00106252, -0.02532815],
        ...,
        [-0.06485824, -0.01780481, -0.01711809, ..., -0.02957753,
         -0.02542059,  0.04370107],
        [-0.01419292,  0.0243709 , -0.0093105 , ...,  0.02032012,
          0.02811681, -0.02920331],
        [ 0.01037328,  0.00382223, -0.04239489, ...,  0.03727791,
         -0.04088555,  0.00827561]], shape=(10000, 128), dtype=float32),
 array([[-0.09289397, -0.02975043, -0.09221038, ..., -0.07679912,
          0.01291739, -0.10522214],
        [-0.04457122, -0.06225269,  0.04541303, ..., -0.10383682,
         -0.01106694, -0.04931725],
        [ 0.03112775,  0.08282963,  0.11165565, ..., -0.12608567,
         -0.16389875,  0.0194709 ],
        ...,
        [-0.1497659

In [93]:
# step2:Helper functions
# Function to decode reviews
def decode_review(encoded_review):
    return ' '.join([reverse_word_index.get(i-3,'?') for i in encoded_review])

# Function to preprocess user input
def preprocess_text(text):
    words=text.lower().split()
    encoded_review = []
    for word in words:
        index = word_index.get(word, 2) + 3  # +3 for IMDB offset
        # +3 is used because the first 3 indices (0, 1, 2) are reserved for special tokens in the IMDB dataset.

# So, the IMDB word indices start from 1, but when you actually train or predict,
# the model expects all real words to start from index 3.
        if index >= 10000:  # outside vocabulary
            index = 2  # map to <UNK> token
        encoded_review.append(index)
    padded_review=sequence.pad_sequences([encoded_review],maxlen=500)
    return padded_review

In [94]:
# Prediction function
def predict_sentiment(review):
    preprocessed_input=preprocess_text(review)
    prediction=model.predict(preprocessed_input)
    sentiment='Positive' if prediction[0][0] >0.5 else 'Negative'
    return sentiment,prediction[0][0]

In [95]:
# Example Review for prediction
example_review="The movie was fantastic! The acting wa great and the plot was thrilling"
sentiment,score=predict_sentiment(example_review)
print(f"Review :{example_review}")
print(f"Sentiment :{sentiment}")
print(f"Prediction score: {score}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 211ms/step
Review :The movie was fantastic! The acting wa great and the plot was thrilling
Sentiment :Positive
Prediction score: 0.5120401382446289
