In [2]:
!pip install tensorflow keras --quiet
print("✓ TensorFlow and Keras installed!")

✓ TensorFlow and Keras installed!


In [1]:
# Import Libraries
import numpy as np
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

import tensorflow as tf
from tensorflow import keras
from keras.datasets import imdb
from keras.preprocessing import sequence
from keras.models import load_model
from keras.layers import Embedding, SimpleRNN, Dense

print(f"✓ TensorFlow version: {tf.__version__}")
print("✓ All imports successful!")

  if not hasattr(np, "object"):


✓ TensorFlow version: 2.16.1
✓ All imports successful!


In [2]:
# Load IMDB word index and create reverse mapping
word_index = imdb.get_word_index()
reverse_word_index = {value: key for key, value in word_index.items()}

print(f"✓ Word index loaded! Vocabulary size: {len(word_index)}")
print(f"Sample word mapping: {list(word_index.items())[:5]}")

✓ Word index loaded! Vocabulary size: 88584
Sample word mapping: [('fawn', 34701), ('tsukino', 52006), ('nunnery', 52007), ('sonja', 16816), ('vani', 63951)]


In [20]:
# Model Configuration
model_path = 'simple_rnn_imdb(1).h5'
max_features = 10000
max_len = 500

# Check current working directory and available files
print(f"Current working directory: {os.getcwd()}")
print(f"Looking for model at: {model_path}")
print(f"Files in current directory:")
for f in os.listdir('.'):
    if f.endswith('.h5'):
        print(f"  - {f}")

# Load the pre-trained model
if os.path.exists(model_path):
    try:
        model = load_model(model_path)
        print(f"✓ Model loaded from {model_path}")
    except Exception as e:
        print(f"⚠ Error loading model: {str(e)[:100]}")
        # Build model from scratch if loading fails
        model = keras.Sequential([
            keras.layers.Embedding(max_features, 128, input_length=max_len),
            keras.layers.SimpleRNN(128, activation='relu'),
            keras.layers.Dense(1, activation='sigmoid')
        ])
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        print("Using freshly initialized model")
else:
    print(f"⚠ Model file not found: {model_path}")
    print("Available .h5 files in current directory and subdirectories:")
    for root, dirs, files in os.walk('.'):
        for f in files:
            if f.endswith('.h5'):
                print(f"  - {os.path.join(root, f)}")
    # Build model from scratch
    model = keras.Sequential([
        keras.layers.Embedding(max_features, 128, input_length=max_len),
        keras.layers.SimpleRNN(128, activation='relu'),
        keras.layers.Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    print("Using freshly initialized model")

print("✓ Model ready!")
print("\nModel Architecture:")
model.summary()



Current working directory: g:\simpleRNN
Looking for model at: simple_rnn_imdb(1).h5
Files in current directory:
  - simple_rnn_imdb(1).h5
✓ Model loaded from simple_rnn_imdb(1).h5
✓ Model ready!

Model Architecture:


In [26]:
# Helper Functions
def decode_review(encoded_review):
    """Decode an encoded review back to words"""
    return ' '.join([reverse_word_index.get(i - 3, '?') for i in encoded_review])

def preprocess_text(text):
    """
    Preprocess text using IMDB's word index.
    Word indices 0-3 are reserved: 0=padding, 1=start, 2=OOV, 3=unused
    """
    words = text.lower().split()
    
    encoded_review = []
    for word in words:
        # Check if word exists in dictionary
        if word in word_index:
            idx = word_index[word]
            # Only use indices within training range
            if idx < max_features:
                # Add 3 to match Keras' imdb preprocessing (indices 0-3 are reserved)
                encoded_review.append(idx + 3)
            else:
                # Word index out of range, use OOV token
                encoded_review.append(2)
        else:
            # Word not found, use OOV token
            encoded_review.append(2)
    
    # Pad to max_len
    padded_review = sequence.pad_sequences([encoded_review], maxlen=max_len)
    return padded_review

def predict_review(review_text, confidence_threshold=0.5):
    """
    Predict sentiment for a given review text.
    Returns: (sentiment, confidence_score, is_uncertain)
    """
    preprocessed = preprocess_text(review_text)
    prediction = model.predict(preprocessed, verbose=0)
    confidence = float(prediction[0][0])
    
    # Determine sentiment with uncertainty detection
    if abs(confidence - 0.5) < 0.1:  # Within 10% of threshold
        sentiment = 'UNCERTAIN'
        is_uncertain = True
    elif confidence > confidence_threshold:
        sentiment = 'Positive'
        is_uncertain = False
    else:
        sentiment = 'Negative'
        is_uncertain = False
    
    return sentiment, confidence, is_uncertain

print("✓ Helper functions defined!")

✓ Helper functions defined!


In [27]:
# Example 1: Test with positive review
print("="*70)
print("EXAMPLE 1: Positive Review")
print("="*70)
review_1 = "This movie was amazing! Great acting and brilliant plot. Highly recommended!"
sentiment_1, score_1, uncertain_1 = predict_review(review_1)
print(f"Review: {review_1}")
print(f"Sentiment: {sentiment_1}")
print(f"Confidence Score: {score_1:.4f}")
if uncertain_1:
    print("⚠ Model is UNCERTAIN about this prediction")
print()

EXAMPLE 1: Positive Review
Review: This movie was amazing! Great acting and brilliant plot. Highly recommended!
Sentiment: Positive
Confidence Score: 0.9856



In [28]:
# Example 2: Test with negative review
print("="*70)
print("EXAMPLE 2: Negative Review")
print("="*70)
review_2 = "This movie was horrible! Bad acting and terrible plot. Waste of time."
sentiment_2, score_2, uncertain_2 = predict_review(review_2)
print(f"Review: {review_2}")
print(f"Sentiment: {sentiment_2}")
print(f"Confidence Score: {score_2:.4f}")
if uncertain_2:
    print("⚠ Model is UNCERTAIN about this prediction")
print()

EXAMPLE 2: Negative Review
Review: This movie was horrible! Bad acting and terrible plot. Waste of time.
Sentiment: Negative
Confidence Score: 0.2251



In [29]:
# Example 3: Batch predictions on multiple reviews
print("="*70)
print("EXAMPLE 3: Batch Predictions")
print("="*70)

reviews = [
    "Absolutely loved it! Best movie ever made.",
    "Really disappointed with this film. Poor quality.",
    "It was okay, nothing special.",
    "Outstanding performance! A masterpiece!"
]

print(f"\nTesting {len(reviews)} reviews:\n")
for i, review in enumerate(reviews, 1):
    sentiment, score, uncertain = predict_review(review)
    print(f"{i}. Review: {review}")
    print(f"   Sentiment: {sentiment} (Confidence: {score:.4f})")
    if uncertain:
        print(f"   ⚠ Model is UNCERTAIN about this prediction")
    print()

EXAMPLE 3: Batch Predictions

Testing 4 reviews:

1. Review: Absolutely loved it! Best movie ever made.
   Sentiment: Positive (Confidence: 0.9026)

2. Review: Really disappointed with this film. Poor quality.
   Sentiment: Positive (Confidence: 0.6402)

3. Review: It was okay, nothing special.
   Sentiment: Positive (Confidence: 0.6761)

4. Review: Outstanding performance! A masterpiece!
   Sentiment: Positive (Confidence: 0.8166)



In [30]:
# Load IMDB test data and test on real reviews
print("="*70)
print("TESTING ON REAL IMDB DATA")
print("="*70)

# Load test data
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=max_features)
X_test_padded = sequence.pad_sequences(X_test, maxlen=max_len)

# Evaluate model on test set
test_loss, test_accuracy = model.evaluate(X_test_padded[:100], y_test[:100], verbose=0)
print(f"\nModel Accuracy on 100 IMDB test samples: {test_accuracy:.2%}")
print(f"Model Loss: {test_loss:.4f}")

# Show predictions on actual IMDB reviews
print("\n" + "="*70)
print("Sample Predictions on Real IMDB Reviews:")
print("="*70)

for i in range(3):
    decoded = decode_review(X_test[i])
    actual = "Positive" if y_test[i] == 1 else "Negative"
    
    # Get prediction with raw confidence
    preprocessed = X_test_padded[i:i+1]
    raw_pred = model.predict(preprocessed, verbose=0)[0][0]
    predicted, score, uncertain = predict_review(decoded)
    
    print(f"\nReview {i+1}: {decoded[:150]}...")
    print(f"Actual: {actual}")
    print(f"Predicted: {predicted} (Confidence: {score:.4f})")
    if uncertain:
        print(f"⚠ Model is UNCERTAIN - Confidence is too close to threshold")

TESTING ON REAL IMDB DATA

Model Accuracy on 100 IMDB test samples: 79.00%
Model Loss: 0.4521

Sample Predictions on Real IMDB Reviews:

Review 1: ? please give this one a miss br br ? ? and the rest of the cast rendered terrible performances the show is flat flat flat br br i don't know how mich...
Actual: Negative
Predicted: UNCERTAIN (Confidence: 0.5397)
⚠ Model is UNCERTAIN - Confidence is too close to threshold

Review 2: ? this film requires a lot of patience because it focuses on mood and character development the plot is very simple and many of the scenes take place ...
Actual: Positive
Predicted: Positive (Confidence: 0.9969)

Review 3: ? many animation buffs consider ? ? the great forgotten genius of one special branch of the art puppet animation which he invented almost single ? and...
Actual: Positive
Predicted: Positive (Confidence: 0.9327)
