In [None]:
import pandas as pd
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import re
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# Download VADER lexicon
nltk.download('vader_lexicon')

# Initialize Sentiment Analyzer
sid = SentimentIntensityAnalyzer()

# Sample dataset created in code
data = {
    'Text': [
        "I love this product! It's amazing and works perfectly.",
        "This is the worst service I've ever received.",
        "Totally okay, nothing special.",
        "Great customer support and fast delivery!",
        "Not satisfied with the quality.",
        "It's just fine, average experience.",
        "Absolutely fantastic, highly recommend!",
        "Terrible. I want my money back!",
        "Mediocre performance but decent price.",
        "Loved it! Will buy again."
    ],
    # Optional true labels (0 = Negative, 1 = Positive)
    'label': [1, 0, 2, 1, 0, 2, 1, 0, 2, 1]  # Use 2 for Neutral
}

# Convert to DataFrame
df = pd.DataFrame(data)

print("Dataset Overview:")
print(df.head())

# Text preprocessing
def preprocess_text(text):
    text = re.sub(r"http\S+|www\S+|@\w+", "", text)
    text = re.sub(r"[^a-zA-Z\s]", "", text)
    text = text.lower()
    return text

df['cleaned_text'] = df['Text'].apply(preprocess_text)

# Sentiment analysis function
def analyze_sentiment(text):
    sentiment_scores = sid.polarity_scores(text)
    if sentiment_scores['compound'] >= 0.05:
        return "Positive"
    elif sentiment_scores['compound'] <= -0.05:
        return "Negative"
    else:
        return "Neutral"

# Apply sentiment analysis
df['predicted_sentiment'] = df['cleaned_text'].apply(analyze_sentiment)

# Optional label mapping (if label column exists)
if 'label' in df.columns:
    label_map = {0: "Negative", 1: "Positive", 2: "Neutral"}
    df['label'] = df['label'].map(label_map)
    print("\nClassification Report:")
    print(classification_report(df['label'], df['predicted_sentiment']))

# Train-test split (demonstration purpose)
X_train, X_test, y_train, y_test = train_test_split(
    df['cleaned_text'], df['predicted_sentiment'], test_size=0.2, random_state=42
)

# Show sample results
print("\nSample Results:")
print(df[['Text', 'cleaned_text', 'predicted_sentiment']])


[nltk_data] Downloading package vader_lexicon to /root/nltk_data...


Dataset Overview:
                                                Text  label
0  I love this product! It's amazing and works pe...      1
1      This is the worst service I've ever received.      0
2                     Totally okay, nothing special.      2
3          Great customer support and fast delivery!      1
4                    Not satisfied with the quality.      0

Classification Report:
              precision    recall  f1-score   support

    Negative       0.75      1.00      0.86         3
     Neutral       1.00      0.33      0.50         3
    Positive       0.80      1.00      0.89         4

    accuracy                           0.80        10
   macro avg       0.85      0.78      0.75        10
weighted avg       0.84      0.80      0.76        10


Sample Results:
                                                Text  \
0  I love this product! It's amazing and works pe...   
1      This is the worst service I've ever received.   
2                     Totally ok