In [7]:
# =========================
# Full Sentiment Analysis Fix
# =========================

# 1️⃣ Install required packages (run once)
# !pip install nltk scikit-learn contractions

import pandas as pd
import re
import nltk
from nltk.corpus import stopwords
from nltk.sentiment import SentimentIntensityAnalyzer
import contractions
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report

# Download necessary NLTK data
nltk.download('stopwords')
nltk.download('vader_lexicon')

# =========================
# 2️⃣ Sample dataset
# Replace this with your real dataset
# =========================
data = {
    'Text': [
        'I absolutely loved the service!',
        'This was a horrible experience.',
        "It's fine, not too bad.",
        'Amazing performance, really enjoyed it!',
        'Waste of time and money.'
    ],
    'Sentiment': ['Positive', 'Negative', 'Positive', 'Positive', 'Negative']
}

df = pd.DataFrame(data)

# =========================
# 3️⃣ Preprocessing
# =========================

# Keep common negation words
stop = set(stopwords.words('english')) - {"not","no","never","nor","n't"}

def preprocess(text):
    text = str(text)
    text = contractions.fix(text)                      # expand contractions
    text = text.lower()                                # lowercase
    text = re.sub(r"[^a-z0-9\s']", " ", text)         # keep apostrophes
    tokens = [w for w in text.split() if w not in stop]
    return " ".join(tokens)

df['CleanText'] = df['Text'].apply(preprocess)

# =========================
# 4️⃣ Vectorization (TF-IDF with unigrams + bigrams)
# =========================
vec = TfidfVectorizer(ngram_range=(1,2), min_df=1)  # min_df=1 since small dataset
X = vec.fit_transform(df['CleanText'])
y = df['Sentiment']

# =========================
# 5️⃣ Train/test split
# =========================
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# =========================
# 6️⃣ Train Naive Bayes classifier
# =========================
model = MultinomialNB()
model.fit(X_train, y_train)

# =========================
# 7️⃣ Evaluate on test set
# =========================
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

# =========================
# 8️⃣ Compare predictions on sample texts
# =========================
sample_texts = [
    'I absolutely loved the service!',
    'This was a horrible experience.',
    "It's fine, not too bad.",
    'Amazing performance, really enjoyed it!',
    'Waste of time and money.'
]

# Preprocess sample texts
sample_clean = [preprocess(t) for t in sample_texts]

# Model predictions
print("\n--- Model Predictions ---")
for t, p in zip(sample_texts, model.predict(vec.transform(sample_clean))):
    print(f"Text: {t}\nPredicted: {p}\n")

# =========================
# 9️⃣ VADER predictions
# =========================
sia = SentimentIntensityAnalyzer()

def vader_label(text):
    s = sia.polarity_scores(text)['compound']
    if s >= 0.05:
        return "Positive"
    elif s <= -0.05:
        return "Negative"
    else:
        return "Neutral"

print("\n--- VADER Predictions ---")
for t in sample_texts:
    print(f"Text: {t}\nVADER: {vader_label(t)}\n")


Accuracy: 0.0
              precision    recall  f1-score   support

    Negative       0.00      0.00      0.00       1.0
    Positive       0.00      0.00      0.00       0.0

    accuracy                           0.00       1.0
   macro avg       0.00      0.00      0.00       1.0
weighted avg       0.00      0.00      0.00       1.0


--- Model Predictions ---
Text: I absolutely loved the service!
Predicted: Positive

Text: This was a horrible experience.
Predicted: Positive

Text: It's fine, not too bad.
Predicted: Positive

Text: Amazing performance, really enjoyed it!
Predicted: Positive

Text: Waste of time and money.
Predicted: Negative


--- VADER Predictions ---
Text: I absolutely loved the service!
VADER: Positive

Text: This was a horrible experience.
VADER: Negative

Text: It's fine, not too bad.
VADER: Positive

Text: Amazing performance, really enjoyed it!
VADER: Positive

Text: Waste of time and money.
VADER: Negative



[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\DELL\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\DELL\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
