In [8]:
# =========================
# Full Sentiment Analysis Demo (Clean Version)
# =========================

# 1️⃣ Install required packages (run once)
# !pip install nltk scikit-learn

import pandas as pd
import re
import nltk
from nltk.corpus import stopwords
from nltk.sentiment import SentimentIntensityAnalyzer
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report
from sklearn.exceptions import UndefinedMetricWarning
import warnings

# =========================
# Suppress warnings for clean output
# =========================
warnings.filterwarnings("ignore", category=UndefinedMetricWarning)

# Download NLTK data
nltk.download('stopwords')
nltk.download('vader_lexicon')

# =========================
# Sample dataset (replace with your own if needed)
# =========================
data = {
    'Text': [
        'I absolutely loved the service!',
        'This was a horrible experience.',
        "It's fine, not too bad.",
        'Amazing performance, really enjoyed it!',
        'Waste of time and money.'
    ],
    'Sentiment': ['Positive', 'Negative', 'Positive', 'Positive', 'Negative']
}

df = pd.DataFrame(data)

# =========================
# Preprocessing
# =========================
stop = set(stopwords.words('english')) - {"not","no","never","nor","n't"}

def preprocess(text):
    text = str(text).lower()                          # lowercase
    text = re.sub(r"[^a-z0-9\s']", " ", text)         # keep apostrophes
    tokens = [w for w in text.split() if w not in stop]
    return " ".join(tokens)

df['CleanText'] = df['Text'].apply(preprocess)

# =========================
# TF-IDF vectorization (optional ML model)
# =========================
vec = TfidfVectorizer(ngram_range=(1,2), min_df=1)
X = vec.fit_transform(df['CleanText'])
y = df['Sentiment']

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Naive Bayes classifier
model = MultinomialNB()
model.fit(X_train, y_train)

# Evaluate (may be unreliable due to tiny dataset)
y_pred = model.predict(X_test)
print("ML Model Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

# =========================
# Sample texts for prediction
# =========================
sample_texts = [
    'I absolutely loved the service!',
    'This was a horrible experience.',
    "It's fine, not too bad.",
    'Amazing performance, really enjoyed it!',
    'Waste of time and money.'
]

sample_clean = [preprocess(t) for t in sample_texts]

# ML model predictions
print("\n--- ML Model Predictions ---")
for t, p in zip(sample_texts, model.predict(vec.transform(sample_clean))):
    print(f"Text: {t}\nPredicted: {p}\n")

# =========================
# VADER sentiment predictions
# =========================
sia = SentimentIntensityAnalyzer()

def vader_label(text):
    s = sia.polarity_scores(text)['compound']
    if s >= 0.05:
        return "Positive"
    elif s <= -0.05:
        return "Negative"
    else:
        return "Neutral"

print("\n--- VADER Predictions ---")
for t in sample_texts:
    print(f"Text: {t}\nVADER: {vader_label(t)}\n")


ML Model Accuracy: 0.0
              precision    recall  f1-score   support

    Negative       0.00      0.00      0.00       1.0
    Positive       0.00      0.00      0.00       0.0

    accuracy                           0.00       1.0
   macro avg       0.00      0.00      0.00       1.0
weighted avg       0.00      0.00      0.00       1.0


--- ML Model Predictions ---
Text: I absolutely loved the service!
Predicted: Positive

Text: This was a horrible experience.
Predicted: Positive

Text: It's fine, not too bad.
Predicted: Positive

Text: Amazing performance, really enjoyed it!
Predicted: Positive

Text: Waste of time and money.
Predicted: Negative


--- VADER Predictions ---
Text: I absolutely loved the service!
VADER: Positive

Text: This was a horrible experience.
VADER: Negative

Text: It's fine, not too bad.
VADER: Positive

Text: Amazing performance, really enjoyed it!
VADER: Positive

Text: Waste of time and money.
VADER: Negative



[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\DELL\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\DELL\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
