In [26]:
import spacy
from transformers import pipeline
from collections import Counter
import pandas as pd
from textblob import TextBlob

# Load Spacy for NLP
nlp = spacy.load("en_core_web_sm")

# Load a sentiment analysis model
sentiment_model = pipeline(
    "sentiment-analysis",
    model="distilbert-base-uncased-finetuned-sst-2-english"
)

# Clean tag to remove generic words
def clean_tag(tag):
    return tag.lower().strip().replace("the ", "").replace("a ", "").replace("an ", "")

# Extract tags dynamically with lemmatization
def extract_tags_with_nlp(review):
    doc = nlp(review)
    tags = set()  # Use a set to avoid duplicates
    
    # Remove stop words before extracting noun chunks and other tokens
    for chunk in doc.noun_chunks:
        cleaned_tag = clean_tag(chunk.lemma_)
        if cleaned_tag not in {"it", "they", "we", "you", "ear"} and not nlp.vocab[cleaned_tag].is_stop:  # Exclude stop words and pronouns
            tags.add(cleaned_tag)
    
    for token in doc:
        # Include NOUN and exclude stop words and punctuation
        if token.pos_ == "NOUN" and not token.is_stop and not token.is_punct:
            cleaned_tag = clean_tag(token.lemma_)
            tags.add(cleaned_tag)

    return list(tags)

# Extract the sentence for context
def get_context_sentence(review, tag):
    doc = nlp(review)
    for sent in doc.sents:
        if tag in sent.text.lower():
            return sent.text.strip()
    return review

# Custom rules for known negative phrases
def check_custom_rules(tag, context):
    negative_phrases = ["stopped working", "disappointing", "doesn't work", "poor quality"]
    for phrase in negative_phrases:
        if phrase in context.lower():
            return "NEGATIVE"
    return None

# Analyze tags with improved sentiment accuracy
def analyze_review_tags(review):
    tags = extract_tags_with_nlp(review)
    results = {}
    for tag in tags:
        # Extract the context sentence
        context_sentence = get_context_sentence(review, tag)
        # Check for custom rules first
        custom_result = check_custom_rules(tag, context_sentence)
        if custom_result:
            results[tag] = custom_result
            continue
        # Use the sentiment model otherwise
        input_text = f"Review: {context_sentence} This is about {tag}."
        sentiment = sentiment_model(input_text)[0]
        results[tag] = sentiment["label"]  # POSITIVE or NEGATIVE
    return results

# Load data from CSV file
data = pd.read_csv('data.csv')

# Extract tags from all reviews
all_tags = []
for review in data['review']:
    all_tags.extend(extract_tags_with_nlp(review))

# Count the frequency of each tag
tag_counts = Counter(all_tags)

# Define common words to exclude from tags
common_words = {"product", "item", "thing", "stuff", "something"}

# Determine the number of relevant tags dynamically based on a threshold
threshold = 0.01  # Adjust the threshold as needed
total_tags = sum(tag_counts.values())
tags = [phrase for phrase, count in tag_counts.items() if count / total_tags >= threshold and phrase not in common_words]

# Print the list of all generated tags
print("Generated Tags:")
print(tags)

# Associate tags with reviews
def assign_tags(phrases):
    return list(set(phrases) & set(tags))

data['tags'] = data['review'].apply(lambda review: assign_tags(extract_tags_with_nlp(review)))

# Perform sentiment analysis and add 'sentiment_indicator' column
def get_sentiment_indicator(review):
    analysis = TextBlob(review)
    if analysis.sentiment.polarity > 0:
        return '✔️'
    elif analysis.sentiment.polarity < 0:
        return '❌'
    else:
        return '😐'

data['sentiment_indicator'] = data['review'].apply(get_sentiment_indicator)

# Create a tag to reviews mapping
tag_reviews = {tag: data[data['tags'].apply(lambda x: tag in x)] for tag in tags}

# Aggregate sentiment indicators for each tag
tag_sentiments = {}
for tag, reviews in tag_reviews.items():
    positive_count = (reviews['sentiment_indicator'] == '✔️').sum()
    negative_count = (reviews['sentiment_indicator'] == '❌').sum()
    total_count = positive_count + negative_count
    
    if positive_count > negative_count:
        overall_sentiment = '✔️ Positive'
    else:
        overall_sentiment = '❌ Negative'
    
    tag_sentiments[tag] = overall_sentiment

# Display the overall sentiment for each tag
print("\nOverall Sentiment for Each Tag:")
for tag, sentiment in tag_sentiments.items():
    print(f"{tag}: {sentiment}")

Device set to use cpu


Generated Tags:
['quality', 'sound quality', 'sound', 'bass', 'battery', 'ear', 'backup', 'headphone', 'price']

Overall Sentiment for Each Tag:
quality: ✔️ Positive
sound quality: ✔️ Positive
sound: ✔️ Positive
bass: ✔️ Positive
battery: ✔️ Positive
ear: ✔️ Positive
backup: ✔️ Positive
headphone: ✔️ Positive
price: ✔️ Positive


In [20]:
selected_tag = 'battery'  # Replace with any tag from the generated tags
filtered_reviews = tag_reviews.get(selected_tag, pd.DataFrame())
if not filtered_reviews.empty:
    print(f"\nReviews for tag '{selected_tag}':")
    print(filtered_reviews[['review']])
else:
    print(f"\nNo reviews found for tag '{selected_tag}'.")


Reviews for tag 'battery':
                                                 review
2     awesome sound quality. pros 7-8 hrs of battery...
5     Awsome sound powerful bass battery backup is a...
12    I am using this headphone since 6 months, grea...
14    Reson for 1 star : Sounds for alerts for conne...
16    sound: its relay rock when I compare with othe...
...                                                 ...
9854  Sound quality is not so good.i received I spea...
9857  Awsm sound quality.Vocals are clear.Average ba...
9865  Sound and bass is very super and I am impresse...
9914  Excellent battery backupSound quality also exc...
9920  The product is very nice but initially it fits...

[836 rows x 1 columns]


In [27]:
!pip freeze > requirements.txt

In [1]:
import pkg_resources

# List of libraries to check
libraries = [
    "en_core_web_sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl#sha256=1932429db727d4bff3deed6b34cfc05df17794f4a52eeb26cf8928f7c1a0fb85",
    "streamlit",
    "pandas",
    "spacy",
    "textblob",
    "collections"
]

# Function to get the version of a library
def get_version(lib):
    try:
        version = pkg_resources.get_distribution(lib).version
        return f"{lib}=={version}"
    except pkg_resources.DistributionNotFound:
        return f"{lib} is not installed"

# Generate requirements.txt
with open("requirements.txt", "w") as f:
    for lib in libraries:
        if "@" in lib:
            f.write(lib + "\n")
        else:
            version_info = get_version(lib)
            f.write(version_info + "\n")
            print(version_info)

print("requirements.txt has been generated.")

streamlit==1.41.0
pandas==2.2.3
spacy==3.8.2
textblob==0.18.0.post0
collections is not installed
requirements.txt has been generated.
