In [None]:
import pandas as pd
import nltk
import spacy
from nltk.sentiment import SentimentIntensityAnalyzer
from collections import defaultdict

# Download necessary NLTK datasets
nltk.download('vader_lexicon')

# Load spaCy English model
nlp = spacy.load("en_core_web_sm")

# Load data
df = pd.read_csv('data.csv')

# Setup Sentiment Analyzer
sia = SentimentIntensityAnalyzer()

# Step 1: Sentiment Analysis per review
df['sentiment_score'] = df['review_description'].apply(lambda x: sia.polarity_scores(str(x))['compound'])
df['sentiment_label'] = df['sentiment_score'].apply(
    lambda x: 'Positive' if x > 0.05 else 'Negative' if x < -0.05 else 'Neutral'
)

# Step 2: Aspect-Based Sentiment per product
product_aspect_sentiments = defaultdict(lambda: defaultdict(list))

for _, row in df.iterrows():
    product_id = row['product_id']
    sentiment = row['sentiment_label']
    doc = nlp(str(row['review_description']))

    for chunk in doc.noun_chunks:
        if chunk.root.pos_ == "NOUN" and len(chunk.text.strip()) > 2:
            aspect = chunk.root.lemma_.lower()
            product_aspect_sentiments[product_id][aspect].append(sentiment)

# Step 3: Summarize sentiment and aspects per product
product_summary = []
aspect_rows = []

for product_id in df['product_id'].unique():
    product_reviews = df[df['product_id'] == product_id]

    pos = (product_reviews['sentiment_label'] == 'Positive').sum()
    neg = (product_reviews['sentiment_label'] == 'Negative').sum()
    neu = (product_reviews['sentiment_label'] == 'Neutral').sum()

    overall_verdict = "Liked" if pos > neg else "Not Liked"

    # Collect aspects
    aspect_dict = product_aspect_sentiments[product_id]
    appreciated = []
    criticized = []

    for aspect, sentiments in aspect_dict.items():
        p = sentiments.count('Positive')
        n = sentiments.count('Negative')
        appreciated.append((aspect, p))
        criticized.append((aspect, n))

        # Save aspect-level info
        aspect_rows.append({
            'product_id': product_id,
            'aspect': aspect,
            'positive_mentions': p,
            'negative_mentions': n,
            'net_sentiment': p - n
        })

    appreciated.sort(key=lambda x: x[1], reverse=True)
    criticized.sort(key=lambda x: x[1], reverse=True)

    appreciated_summary = ", ".join([asp for asp, count in appreciated if count > 0][:5])
    criticized_summary = ", ".join([asp for asp, count in criticized if count > 0][:5])

    # Final product summary
    product_summary.append({
        'product_id': product_id,
        'total_reviews': len(product_reviews),
        'positive_reviews': pos,
        'negative_reviews': neg,
        'neutral_reviews': neu,
        'overall_verdict': overall_verdict,
        'top_appreciated_aspects': appreciated_summary if appreciated_summary else "None",
        'top_criticized_aspects': criticized_summary if criticized_summary else "None"
    })

# Convert to DataFrames
product_sentiment_summary_df = pd.DataFrame(product_summary)
aspect_based_opinion_df = pd.DataFrame(aspect_rows)

# Save to CSVs
product_sentiment_summary_df.to_csv("product_overall_sentiment_summary.csv", index=False)
aspect_based_opinion_df.to_csv("aspect_based_opinion_per_product.csv", index=False)

print("Analysis complete.")
print("product_overall_sentiment_summary.csv: Overall verdict + summary")
print("aspect_based_opinion_per_product.csv: Aspect-wise feedback per product")


[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


Analysis complete.
product_overall_sentiment_summary.csv: Overall verdict + summary
aspect_based_opinion_per_product.csv: Aspect-wise feedback per product
