In [3]:
import pandas as pd
import numpy as np
import re
import nltk
from nltk.corpus import stopwords, sentiwordnet as swn, wordnet as wn
from nltk.tokenize import word_tokenize, sent_tokenize
from collections import Counter

nltk.download('punkt')
nltk.download('stopwords')
nltk.download('sentiwordnet')
nltk.download('wordnet')
nltk.download('averaged_perceptron_tagger')

def get_adjective_sentiment(word):
    synsets = list(swn.senti_synsets(word))
    if not synsets:
        return 0.0

    scores = []
    for syn in synsets:
        if syn.synset.pos() == 'a' or syn.synset.pos() == 's':
            scores.append(syn.pos_score() - syn.neg_score())

    if len(scores) > 0:
        return sum(scores) / len(scores)
    return 0.0

def get_rake_keywords(text):
    stop_words = set(stopwords.words('english'))
    sentences = sent_tokenize(text)
    phrase_list = []

    for sentence in sentences:
        words = word_tokenize(sentence)
        phrase = []
        for word in words:
            if word.lower() not in stop_words and word.isalpha():
                phrase.append(word.lower())
            else:
                if phrase:
                    phrase_list.append(" ".join(phrase))
                    phrase = []
        if phrase:
            phrase_list.append(" ".join(phrase))

    word_freq = Counter()
    word_degree = Counter()

    for phrase in phrase_list:
        words = phrase.split()
        length = len(words)
        for w in words:
            word_freq[w] += 1
            word_degree[w] += length

    word_scores = {}
    for w in word_freq:
        word_scores[w] = word_degree[w] / word_freq[w]

    phrase_scores = {}
    for phrase in phrase_list:
        if phrase not in phrase_scores:
            score = 0
            for w in phrase.split():
                score += word_scores[w]
            phrase_scores[phrase] = score

    return sorted(phrase_scores.items(), key=lambda x: x[1], reverse=True)

def analyze_sentiment(text, aspect):
    tokens = word_tokenize(text.lower())
    aspect_words = aspect.split()
    score = 0
    count = 0
    negations = {'not', 'no', 'never', 'none', 'neither', 'nor', 'hardly'}

    for i, token in enumerate(tokens):
        if token in aspect_words:
            window_start = max(0, i - 3)
            window_end = min(len(tokens), i + 4)
            window = tokens[window_start:window_end]

            is_negated = False
            local_score = 0

            for w in window:
                if w in negations:
                    is_negated = True

                word_score = get_adjective_sentiment(w)
                if word_score != 0:
                    local_score += word_score

            if is_negated:
                local_score = local_score * -1

            if local_score != 0:
                score += local_score
                count += 1

    if count > 0:
        return score / count
    return 0.0

df = pd.read_csv('headphone.csv')
df['clean_text'] = df['ReviewBody'].astype(str).str.lower()
df['clean_text'] = df['clean_text'].apply(lambda x: re.sub(r'[^a-z\s]', ' ', x))

all_text = " ".join(df['clean_text'].dropna().tolist())
keywords = get_rake_keywords(all_text)
top_aspects = [k[0] for k in keywords[:5]]

print("Top Aspects Found:", top_aspects)

results = {}
for aspect in top_aspects:
    sentiments = []
    for text in df['clean_text']:
        s = analyze_sentiment(text, aspect)
        if s != 0:
            sentiments.append(s)

    if len(sentiments) > 0:
        avg_score = sum(sentiments) / len(sentiments)
        results[aspect] = {'average_sentiment': avg_score, 'count': len(sentiments)}

print("\nFinal Analysis Results:")
print(pd.DataFrame(results).T)

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package sentiwordnet to /root/nltk_data...
[nltk_data]   Package sentiwordnet is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


Top Aspects Found: ['product ki warrenty kamse kam mahine ho na jaruri hai mahineme iseme problem hu wa hai worst earphones', 'damaged product ek number hai matalab skullcandy ke inked se bhi supper paisa wasul original product thanks amazon ok best bluetooth earphone', 'ft received earbuds call ke time bat krne pr srrrr srrr ki awaj aa rhi h aaj', 'pocket good price good product superb bass effort nice thankful amazon superb bass effort nice thankful amazon fully paisa vasul still', 'others reviewers mentioned oxmmm yr gjb k base hai battery lfe mstsound quality oxmmm value']

Final Analysis Results:
                                                    average_sentiment   count
product ki warrenty kamse kam mahine ho na jaru...           0.385804  1813.0
damaged product ek number hai matalab skullcand...           0.449284  2215.0
ft received earbuds call ke time bat krne pr sr...           0.199549   413.0
pocket good price good product superb bass effo...           0.591251  3412.0
o