In [1]:
import pandas as pd
import numpy as np
import joblib
import re
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import ipywidgets as widgets
from IPython.display import display, clear_output
import matplotlib.pyplot as plt

# Load model and vectorizer
try:
    model = joblib.load(r'C:\Users\admin\Desktop\data\models\best_sentiment_model_logistic_regression.pkl')
    tfidf = joblib.load(r'C:\Users\admin\Desktop\data\models\tfidf_vectorizer.pkl')
    print("✅ Model and vectorizer loaded successfully!")
except:
    print("❌ Could not load model. Make sure you've run the training notebook first.")

✅ Model and vectorizer loaded successfully!


In [2]:
def preprocess_text(text):
    """Preprocess text for prediction"""
    # Convert to lowercase
    text = text.lower()

    # Remove HTML tags
    text = re.sub(r'<[^>]+>', '', text)

    # Remove URLs
    text = re.sub(r'http\S+|www\S+', '', text)

    # Remove special characters and digits
    text = re.sub(r'[^a-zA-Z\s]', '', text)

    # Remove extra whitespace
    text = re.sub(r'\s+', ' ', text).strip()

    # Tokenize
    tokens = word_tokenize(text)

    # Remove stopwords
    stop_words = set(stopwords.words('english'))
    tokens = [token for token in tokens if token not in stop_words and len(token) > 2]

    # Lemmatization
    lemmatizer = WordNetLemmatizer()

    tokens = [lemmatizer.lemmatize(token) for token in tokens]

    return ' '.join(tokens)

# Test the preprocessing
test_text = "This movie was absolutely amazing! I loved every minute of it."
print(f"Original: {test_text}")
print(f"Processed: {preprocess_text(test_text)}")    

Original: This movie was absolutely amazing! I loved every minute of it.
Processed: movie absolutely amazing loved every minute


In [3]:
def predict_sentiment(text):
    """Predict sentiment of given text"""
    if not text.strip():
        return "Please enter some text", 0.0, "neutral"

    # Preprocess text
    processed_text = preprocess_text(text)

    # Transform using TF-IDF
    text_tfidf = tfidf.transform([processed_text])

    # Predict
    prediction = model.predict(text_tfidf)[0]
    probabilities = model.predict_proba(text_tfidf)[0]
    confidence = max(probabilities)
                    
    # Get probability for each class
    prob_negative = probabilities[0] if model.classes_[0] == 'negative' else probabilities[1]
    prob_positive = probabilities[1] if model.classes_[1] == 'positive' else probabilities[0]
    return prediction, confidence, prob_negative, prob_positive

# Test prediction
test_result = predict_sentiment("This movie was absolutely amazing!")
print(f"Prediction: {test_result}")

Prediction: ('positive', np.float64(0.9449061404677044), np.float64(0.05509385953229562), np.float64(0.9449061404677044))


In [4]:
# Create interactive widget
text_input = widgets.Textarea(value='Enter your text here...',
placeholder='Type your review or text here',
description='Text:',
layout=widgets.Layout(width='100%', height='100px')
)

predict_button = widgets.Button(
description='🔍 Analyze Sentiment',
button_style='primary',
layout=widgets.Layout(width='200px')
)

output_area = widgets.Output()

def on_predict_button_clicked(b):
    with output_area:
        clear_output()

        text = text_input.value
        if text and text != 'Enter your text here...':
            prediction, confidence, prob_neg, prob_pos = predict_sentiment(text)

            print("🎯 SENTIMENT ANALYSIS RESULTS")
            print("=" * 40)
            print(f"📝 Input Text: {text[:100]}{'...' if len(text) > 100 else ''}")

            print(f"️ Predicted Sentiment: {prediction.upper()}")
            print(f"🎲 Confidence: {confidence:.2%}")
            print(f"📊 Probability Breakdown:")
            print(f" 😊 Positive: {prob_pos:.2%}")
            print(f" 😞 Negative: {prob_neg:.2%}")

            # Create visualization
            fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))

            # Confidence meter
            colors = ['red' if prediction == 'negative' else 'green']
            ax1.barh(['Confidence'], [confidence], color=colors)
            ax1.set_xlim(0, 1)
            ax1.set_title('Prediction Confidence')
            ax1.set_xlabel('Confidence Score')

            # Probability comparison
            sentiments = ['Negative', 'Positive']
            probabilities = [prob_neg, prob_pos]
            colors = ['lightcoral', 'lightgreen']

            bars = ax2.bar(sentiments, probabilities, color=colors)
            ax2.set_ylim(0, 1)
            ax2.set_title('Sentiment Probabilities')
            ax2.set_ylabel('Probability')

            # Add value labels on bars
            for bar, prob in zip(bars, probabilities):
                height = bar.get_height()
                ax2.text(bar.get_x() + bar.get_width()/2., height + 0.01, f'{prob:.2%}', ha='center', va='bottom')

            plt.tight_layout()
            plt.show()
        else:
            print("⚠️ Please enter some text to analyze!")

predict_button.on_click(on_predict_button_clicked) 

# Display the interface
display(widgets.VBox([widgets.HTML("<h2>🤖 Interactive Sentiment Analysis Tool</h2>"), text_input, predict_button, output_area ]))

VBox(children=(HTML(value='<h2>🤖 Interactive Sentiment Analysis Tool</h2>'), Textarea(value='Enter your text h…

In [5]:
# Sample texts for testing
sample_texts = [
    "This movie was absolutely fantastic! Great acting and storyline.",
    "Terrible film, complete waste of time and money.",
    "The movie was okay, nothing special but not bad either.",
    "I loved the cinematography but the plot was confusing.",
    "Worst movie I've ever seen, don't watch it!",
    "Amazing performances by all actors, highly recommended!",
    "The movie had its moments but overall disappointing.",
    "Brilliant direction and excellent screenplay.",
    "Boring and predictable, fell asleep halfway through.",
    "A masterpiece of modern cinema, truly inspiring!"
]

print("🧪 BATCH TESTING WITH SAMPLE TEXTS")
print("=" * 50)

results = []
for i, text in enumerate(sample_texts, 1):
    prediction, confidence, prob_neg, prob_pos = predict_sentiment(text)
    results.append({
        'Text': text[:50] + '...' if len(text) > 50 else text,
        'Prediction': prediction,
        'Confidence': f"{confidence:.2%}",
        'Positive_Prob': f"{prob_pos:.2%}",
        'Negative_Prob': f"{prob_neg:.2%}"
    })

emoji = "😊" if prediction == 'positive' else "😞"
print(f"{i:2d}. {emoji} {prediction.upper()} ({confidence:.2%}) - {text[:60]}{'...' if len(text) > 60 else ''}")

# Create results DataFrame
results_df = pd.DataFrame(results)
print(f"\n📊 DETAILED RESULTS:")
display(results_df)

🧪 BATCH TESTING WITH SAMPLE TEXTS
10. 😊 POSITIVE (93.90%) - A masterpiece of modern cinema, truly inspiring!

📊 DETAILED RESULTS:


Unnamed: 0,Text,Prediction,Confidence,Positive_Prob,Negative_Prob
0,This movie was absolutely fantastic! Great act...,positive,93.50%,93.50%,6.50%
1,"Terrible film, complete waste of time and money.",negative,99.98%,0.02%,99.98%
2,"The movie was okay, nothing special but not ba...",negative,99.75%,0.25%,99.75%
3,I loved the cinematography but the plot was co...,positive,58.86%,58.86%,41.14%
4,"Worst movie I've ever seen, don't watch it!",negative,97.47%,2.53%,97.47%
5,"Amazing performances by all actors, highly rec...",positive,99.47%,99.47%,0.53%
6,The movie had its moments but overall disappoi...,negative,92.60%,7.40%,92.60%
7,Brilliant direction and excellent screenplay.,positive,96.71%,96.71%,3.29%
8,"Boring and predictable, fell asleep halfway th...",negative,99.31%,0.69%,99.31%
9,"A masterpiece of modern cinema, truly inspiring!",positive,93.90%,93.90%,6.10%
