In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import imdb  ## Preloaded dataset in keras
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import load_model


In [2]:
word_index=imdb.get_word_index() ## Dictionary with the words as keys and the corresponding one-hot encoded indices as values
word_index
rev_keyval={value:key for key,value in word_index.items()}

In [3]:
model=load_model('simple_rnn_imdb.h5')
model.summary()



In [4]:
model.get_weights()

[array([[-0.12524994, -0.04188933,  0.41708514, ...,  0.1626418 ,
         -0.46420884, -0.47645944],
        [-0.00599103, -0.02858121,  0.02579241, ...,  0.00198766,
          0.05796218, -0.00167121],
        [ 0.00289431,  0.08114494,  0.03157181, ...,  0.04527317,
         -0.07705634,  0.01563216],
        ...,
        [-0.0192723 , -0.0453442 ,  0.03562754, ..., -0.05548868,
          0.03581031, -0.03707061],
        [ 0.01329407,  0.02799908,  0.02714697, ..., -0.05190631,
         -0.12977153, -0.03568354],
        [-0.13583264, -0.08374428,  0.07257245, ..., -0.01684491,
          0.10556005,  0.15735564]], shape=(10000, 128), dtype=float32),
 array([[-8.69439468e-02,  7.79515356e-02, -1.28100738e-01, ...,
         -4.97887954e-02,  1.01028726e-01, -9.00678411e-02],
        [-1.13259271e-01,  2.60152332e-02, -1.45949095e-01, ...,
         -4.36700732e-02,  1.40518183e-02,  2.87930910e-02],
        [-9.21980366e-02,  1.47219792e-01, -1.04888938e-01, ...,
          5.41835129e

In [5]:
# Function to decode reviews (numbers → words)
def decode_review(encoded_review):
    # IMDB dataset quirk:
    # - In the actual training data, words are stored with an offset of +3
    #   because 0,1,2 are reserved (0=padding, 1=start, 2=unknown).
    # - Example: word_index says "good"=3, but in the encoded review it's stored as 6.
    #
    # So when decoding back to words, we subtract 3 to undo that offset
    # and look up the real word in the reverse dictionary (rev_keyval).
    # If the word isn't found, we put '?' as a placeholder.
    return ' '.join(rev_keyval.get(i-3, '?') for i in encoded_review)


# Function to preprocess user input (words → numbers)
def preprocess_text(text):
    # Step 1: Make the text lowercase and split into individual words
    words = text.lower().split()
    
    # Step 2: Convert each word into its numeric ID
    # - word_index gives the base ID (e.g., "good"=3).
    # - If the word isn't found, default to 2 (UNKNOWN token).
    # - Then add +3 to align with IMDB's stored format
    #   (since 0,1,2 are reserved).
    encoded_review = [word_index.get(word, 2) + 3 for word in words]
    
    # Step 3: Pad or truncate the list of numbers to a fixed length (500 tokens)
    # - Neural networks need inputs of the same size
    padded_review = sequence.pad_sequences([encoded_review], maxlen=500)
    
    # Step 4: Return the padded numeric sequence (ready for model prediction)
    return padded_review


In [7]:
## Prediction function

def pred_sent(review):
    preprocessed_input=preprocess_text(review)
    prediction=model.predict(preprocessed_input)
    sentiment='positive' if prediction[0][0]>0.5 else 'Negative'
    return sentiment,prediction[0][0]

In [14]:
## Step 4: User Input and Prediction
## Example review for prediction

example='This movie was worse'

sentiment,score=pred_sent(example)
print(f'Review: {example}')
print(f'Sentiment: {sentiment}')
print(f'Prediction Score: {score}')

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
Review: This movie was worse
Sentiment: Negative
Prediction Score: 0.4533403217792511
