# Chapter 5: Classified Sentiment Analysis for Social Media

Dear user, we will classify the comments from social media, according to the top 3 key components, \
then perform sentiment analysis to identify the design opportunities with those components.

In [5]:
'''
Dear user, enter your Product here!
'''

product = "PICO 4 All-in-One VR Headset"

In [6]:
'''
Dear user, enter the directories to your social media sources here!
'''
import pandas as pd

youtube_comment_list = pd.read_pickle("support/%s/youtube/comment_list.pkl" % product)
reddit_comment_list = pd.read_pickle("support/%s/reddit/comment_list.pkl" % product)

In [7]:
'''
Dear user, enter the directories to your social media sources here!
'''
components_to_classify = ['Head Straps', 'Audio', 'Controllers', 'Lenses', 'Cable']

In [8]:
""" Create Classify and Sentiment folder """
search_terms = product

import os
import shutil

# Create "classify" folder
try:
    os.makedirs(f"support/{search_terms}/classify")
except FileExistsError:
    shutil.rmtree(f"support/{search_terms}/classify")
    os.makedirs(f"support/{search_terms}/classify")

# Create "sentiment" folder
try:
    os.makedirs(f"support/{search_terms}/sentiment")
except FileExistsError:
    shutil.rmtree(f"support/{search_terms}/sentiment")
    os.makedirs(f"support/{search_terms}/sentiment")

In [9]:
! pip install accelerate



In [10]:
youtube_comment_list = youtube_comment_list[:100]
reddit_comment_list = reddit_comment_list[:100]

In [11]:
"""
Classify comments by components
"""
def classify_comments(comment_list, file):
    import csv
    from transformers import pipeline

    candidates = components_to_classify + ['Other']               # replace the candidates to suit your needs

    candidate_counts = {candidate: 0 for candidate in candidates}   # Initialize counters

    model = "facebook/bart-large-mnli"                              # default model
    # model = "MoritzLaurer/mDeBERTa-v3-base-mnli-xnli"

    # Initialize CSV files for each category
    csv_files = {candidate: open(f"support/{product}/classify/{file}_{candidate}_comments.csv", "w", newline="", encoding="utf-8") for candidate in candidates}
    writers = {candidate: csv.writer(csv_files[candidate]) for candidate in candidates}
    for writer in writers.values():
        writer.writerow(["Sequence", "Label"])  # Header row

    classifier = pipeline("zero-shot-classification", model=model)
    
    labeled_comments_count = 0  # Initialize counter for labeled comments
    
    # Write results to CSV for each comment
    for comment in comment_list:
        # Classify comment only if it's not empty
        if comment.strip():
            result = classifier(comment, candidate_labels=candidates)
            sequence = result['sequence'] if result['labels'] else None
            label = result['labels'][0] if result['labels'] else None
            
            # Update candidate counters and labeled comments count
            if label:
                candidate_counts[label] += 1
                labeled_comments_count += 1
                
                # Write the comment to the respective CSV file based on its category
                writers[label].writerow([sequence, label])
        else:
            # Write empty comment to each CSV file
            for writer in writers.values():
                writer.writerow([None, None])

    # Close all CSV files
    for file in csv_files.values():
        file.close()

    # Calculate the number of empty comments
    empty_comments_count = len(comment_list) - labeled_comments_count

    # Print summary
    print("Candidate Counts:")
    for candidate, count in candidate_counts.items():
        print(f"{candidate}: {count}")
    print(f"Empty Comments: {empty_comments_count}")
    print("Detailed results written to respective CSV files.")

In [12]:
classify_comments(youtube_comment_list, "youtube")

Candidate Counts:
Head Straps: 0
Audio: 30
Controllers: 6
Lenses: 4
Cable: 4
Other: 54
Empty Comments: 2
Detailed results written to respective CSV files.


In [13]:
classify_comments(reddit_comment_list, "reddit")

Candidate Counts:
Head Straps: 0
Audio: 19
Controllers: 1
Lenses: 2
Cable: 4
Other: 72
Empty Comments: 2
Detailed results written to respective CSV files.


In [14]:
def sentiment_analysis(file):
    import csv
    from transformers import pipeline, AutoTokenizer

    candidates = components_to_classify + ['Other']

    for candidate in candidates:
        # Initialize sentiment counts for each candidate
        positive_count = 0
        negative_count = 0
        neutral_count = 0
        empty_comments_count = 0

        comment_list = []
        with open(f"support/{product}/classify/{file}_{candidate}_comments.csv", "r", encoding="utf-8") as csvfile:
            csv_reader = csv.reader(csvfile)
            for row in csv_reader:
                comment_list.append(row[0])

        if not comment_list:
            continue

        # Define the maximum sequence length
        max_seq_length = 512  # Adjust Truncated Length

        # Model for sentiment analysis
        model = "cardiffnlp/twitter-roberta-base-sentiment"  # negative, neutral, positive

        # Initialise the tokenizer
        tokenizer = AutoTokenizer.from_pretrained(model, use_fast=True)

        # Filter out or truncate excessively long sequences
        filtered_comments = [comment[:max_seq_length - 2] for comment in comment_list]  # -2 to account for special tokens [CLS] and [SEP]

        # Initialise the pipeline with padding
        classifier = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer, padding=True, device=-1)

        results = classifier(filtered_comments)

        # Accumulate sentiment counts
        for i in range(len(results)):
            result = results[i]
            sentiment = result['label']
            if comment_list[i].strip():  # Check if the comment is not empty
                if sentiment == "LABEL_2" or sentiment == "POSITIVE":
                    positive_count += 1
                elif sentiment == "LABEL_0" or sentiment == "NEGATIVE":
                    negative_count += 1
                elif sentiment == "LABEL_1":
                    neutral_count += 1
            else:  # If the comment is empty, count it
                empty_comments_count += 1

        # Output CSV
        with open(f"support/{product}/sentiment/{file}_{candidate}_analysis.csv", "w", newline="", encoding="utf-8") as csvfile:
            writer = csv.writer(csvfile)
            writer.writerow(["Comment", "Sentiment"])
            for i in range(len(results)):
                writer.writerow([comment_list[i], results[i]['label']])

        # Calculate overall sentiment for each candidate
        overall_sentiment = "Positive" if positive_count > negative_count else "Negative" if negative_count > positive_count else "Neutral"

        # Print summary for each candidate
        print(f"Candidate: {candidate}")
        print("Number of comments with Positive sentiment:", positive_count)
        print("Number of comments with Negative sentiment:", negative_count)
        print("Number of comments with Neutral sentiment:", neutral_count)
        print("Number of empty comments:", empty_comments_count)
        print("Overall Sentiment:", overall_sentiment)
        print()  # Add an empty line for readability


In [15]:
sentiment_analysis('youtube')

Candidate: Head Straps
Number of comments with Positive sentiment: 0
Number of comments with Negative sentiment: 0
Number of comments with Neutral sentiment: 1
Number of empty comments: 2
Overall Sentiment: Neutral

Candidate: Audio
Number of comments with Positive sentiment: 8
Number of comments with Negative sentiment: 5
Number of comments with Neutral sentiment: 18
Number of empty comments: 2
Overall Sentiment: Positive

Candidate: Controllers
Number of comments with Positive sentiment: 2
Number of comments with Negative sentiment: 2
Number of comments with Neutral sentiment: 3
Number of empty comments: 2
Overall Sentiment: Neutral

Candidate: Lenses
Number of comments with Positive sentiment: 1
Number of comments with Negative sentiment: 2
Number of comments with Neutral sentiment: 2
Number of empty comments: 2
Overall Sentiment: Negative

Candidate: Cable
Number of comments with Positive sentiment: 3
Number of comments with Negative sentiment: 0
Number of comments with Neutral sen

In [16]:
sentiment_analysis('reddit')

Candidate: Head Straps
Number of comments with Positive sentiment: 0
Number of comments with Negative sentiment: 0
Number of comments with Neutral sentiment: 1
Number of empty comments: 2
Overall Sentiment: Neutral

Candidate: Audio
Number of comments with Positive sentiment: 8
Number of comments with Negative sentiment: 5
Number of comments with Neutral sentiment: 7
Number of empty comments: 2
Overall Sentiment: Positive

Candidate: Controllers
Number of comments with Positive sentiment: 0
Number of comments with Negative sentiment: 1
Number of comments with Neutral sentiment: 1
Number of empty comments: 2
Overall Sentiment: Negative

Candidate: Lenses
Number of comments with Positive sentiment: 1
Number of comments with Negative sentiment: 0
Number of comments with Neutral sentiment: 2
Number of empty comments: 2
Overall Sentiment: Positive

Candidate: Cable
Number of comments with Positive sentiment: 2
Number of comments with Negative sentiment: 0
Number of comments with Neutral sen

In [None]:
### CONFUSION MATRIX