In [2]:
# 🔧 Setup
import pandas as pd
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer
from pathlib import Path

nltk.download('vader_lexicon')

# 📥 Load cleaned data
df = pd.read_csv(Path("../data/cleaned_comments.csv"))

# 🧹 Handle NaN or invalid comments
df["cleaned_comment"] = df["cleaned_comment"].fillna("")  # Fill NaN with empty string

# 🧠 Initialize VADER
sia = SentimentIntensityAnalyzer()

# 🌀 Apply VADER
df["sentiment_score"] = df["cleaned_comment"].apply(lambda x: sia.polarity_scores(str(x))["compound"])

# 🟢 Classify sentiment based on score
def classify_sentiment(score):
    if score >= 0.05:
        return "Positive"
    elif score <= -0.05:
        return "Negative"
    else:
        return "Neutral"

df["sentiment_label"] = df["sentiment_score"].apply(classify_sentiment)

# 💾 Save with sentiments
df.to_csv(Path("../data/comments_with_sentiment.csv"), index=False)

print("✅ Sentiment analysis complete and saved to comments_with_sentiment.csv")


✅ Sentiment analysis complete and saved to comments_with_sentiment.csv


[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /home/codespace/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
