## Emotion Analysis on comments
### Save emotion_analysis_by_video.csv
Reference:

https://stackoverflow.com/questions/56862418/sentiment-analysis-with-nrc-emotion-lexicon-in-python

In [1]:
import pandas as pd
import matplotlib as plt
from collections import Counter
import ast
from nrclex import NRCLex

df = pd.read_csv('cleaned_comments.csv')

def analyze_emotions(token_list):
        # Tokens into a single string
        text = ' '.join(token_list)
        # Create NRCLex object
        text_object = NRCLex(text)
        
        # Get raw emotion scores
        emotion_scores = text_object.raw_emotion_scores
        
        emotion_scores.pop('positive', None)
        emotion_scores.pop('negative', None)
        # Get raw emotion scores
        emotion_scores = text_object.raw_emotion_scores
        return emotion_scores

# Convert Tokens column from string to list
df['Tokens'] = df['Tokens'].apply(ast.literal_eval)

# Combine multiple rows by Video ID
combined_tokens = df.groupby('Video ID')['Tokens'].sum().reset_index()

# Perform emotion analysis 
combined_tokens['Emotion_Scores'] = combined_tokens['Tokens'].apply(analyze_emotions)

# Remove comment tokens
combined_tokens = combined_tokens.drop(columns=['Tokens'])

combined_tokens.to_csv('emotion_analysis_by_video.csv', index=False)

# Print the DataFrame to see the results
print(combined_tokens)

KeyboardInterrupt: 

## Extract and sort Top 5 emotions
### Save sorted_emotions.csv

In [25]:
import pandas as pd
import ast

# Read the emotion analysis by video CSV file
df = pd.read_csv('emotion_analysis_by_video.csv')

# Function to get top 5 emotions
def sort_emotions(emotion_scores_str):
    # Convert string representation of dictionary to an actual dictionary
    emotion_scores = ast.literal_eval(emotion_scores_str)
    # Sort emotion scores by value in descending order
    sorted_emotions = sorted(emotion_scores.items(), key=lambda x: x[1], reverse=True)        
    # Get top 5 emotions
    return sorted_emotions


# Apply the function to the Emotion_Scores column
df['Sorted_Emotions'] = df['Emotion_Scores'].apply(sort_emotions)

# Remove the Emotion_Scores column
df = df.drop(columns=['Emotion_Scores'])

# Save the result to a new CSV file
df.to_csv('sorted_emotions.csv', index=False)

# Print the DataFrame to see the results
print(df)


        Video ID                                     Top_5_Emotions
0    -TeeIEh2IE8  [(joy, 2132), (anticipation, 1444), (trust, 13...
1    -zkOLKMiX9c  [(joy, 705), (trust, 321), (anticipation, 293)...
2    0K9eZCW-lvc  [(joy, 1258), (trust, 716), (anticipation, 573...
3    0_BEwbnmRZ4  [(joy, 965), (trust, 494), (anticipation, 424)...
4    5WH-PSs9hI8  [(joy, 3633), (trust, 1683), (anticipation, 14...
..           ...                                                ...
101  yjhibJ-OqxE  [(joy, 8244), (trust, 4905), (anticipation, 47...
102  zEXXe9Ef_R8  [(joy, 3414), (trust, 1974), (anticipation, 18...
103  zPUSDU0hrwE  [(joy, 538), (trust, 241), (anticipation, 164)...
104  za-ogD7nLi8  [(joy, 1809), (trust, 1133), (anticipation, 94...
105  zgacuXdzCmw  [(joy, 7054), (trust, 6461), (anticipation, 58...

[106 rows x 2 columns]


## Calculate emotion percentages
### Save emotion_percentages.csv
Refrences:

OpenAI GPT-3.5 (2024). AI-Generated Assistance for the percentage calculation.

In [26]:
import pandas as pd
import ast

# Read the emotion analysis by video CSV file
df = pd.read_csv('sorted_emotions.csv')

# Function to calculate percentage of each emotion for each video
def calculate_emotion_percentages(emotion_scores_str):
        # Convert string representation of list to an actual list of tuples
        emotion_scores_list = ast.literal_eval(emotion_scores_str)
        
        # Extract emotion occurrences
        emotion_occurrences = {emotion: occurrence for emotion, occurrence in emotion_scores_list}
        
        # Calculate total occurrences
        total_occurrences = sum(emotion_occurrences.values())
        
        # Calculate percentage of each emotion
        emotion_percentages = {emotion: (occurrence / total_occurrences) * 100 
                               for emotion, occurrence in emotion_occurrences.items()}
        
        return emotion_percentages


# Apply the function to the Top_5_Emotions column
df['Emotion_Percentages'] = df['Top_5_Emotions'].apply(calculate_emotion_percentages)

# Remove the Top_3_Emotions column
df = df.drop(columns=['Top_5_Emotions'])

# Save the result to a new CSV file
df.to_csv('emotion_percentages.csv', index=False)

# Print the DataFrame to see the results
print(df)


        Video ID                                Emotion_Percentages
0    -TeeIEh2IE8  {'joy': 26.497638578175494, 'anticipation': 17...
1    -zkOLKMiX9c  {'joy': 37.34110169491525, 'trust': 17.0021186...
2    0K9eZCW-lvc  {'joy': 35.56686457449816, 'trust': 20.2431439...
3    0_BEwbnmRZ4  {'joy': 29.987569919204475, 'trust': 15.351149...
4    5WH-PSs9hI8  {'joy': 40.27269704023944, 'trust': 18.6564682...
..           ...                                                ...
101  yjhibJ-OqxE  {'joy': 30.259873733666126, 'trust': 18.003964...
102  zEXXe9Ef_R8  {'joy': 28.528453246427677, 'trust': 16.495362...
103  zPUSDU0hrwE  {'joy': 40.420736288504884, 'trust': 18.106686...
104  za-ogD7nLi8  {'joy': 30.22556390977444, 'trust': 18.9306599...
105  zgacuXdzCmw  {'joy': 25.52652529492654, 'trust': 23.3806180...

[106 rows x 2 columns]


## Calculate average emotion percentages
Reference:

https://docs.python.org/3/library/csv.html#csv.DictReader

In [27]:
import csv
import ast
from collections import defaultdict

# Dictionary to store the sum of each emotion
emotion_sums = defaultdict(float)

# Initialize a counter for the number of videos
num_videos = 0

with open('emotion_percentages.csv', 'r') as f:
    reader = csv.DictReader(f)
    for row in reader:
        num_videos += 1
        emotions = ast.literal_eval(row['Emotion_Percentages'])
        for emotion, percentage in emotions.items():
            emotion_sums[emotion] += percentage

# Calculate the average for each emotion
emotion_averages = {emotion: total / num_videos for emotion, total in emotion_sums.items()}

# Print the averages
for emotion, average in emotion_averages.items():
    print(f"{emotion}: {average}")


joy: 31.67381381945753
anticipation: 15.998161135445455
trust: 18.06944027884433
surprise: 8.547522311768628
sadness: 7.730435621993473
disgust: 5.859039147196958
fear: 6.086023839405715
anger: 6.035563845887906


## Assign dominant emotion
### Save sd_emotions.csv

In [58]:
import pandas as pd
import ast
import csv

def assign_emotion(emotions_str):
    emotions = ast.literal_eval(emotions_str)
    if emotions['joy'] > 32.42:
        return 'joy'
    if emotions['anticipation'] > 16.75:
        return 'anticipation'
    if emotions['trust'] > 18.82:
        return 'trust'
    if emotions['surprise'] > 9.30:
        return 'surprise'
    if emotions['sadness'] > 8.49:
        return 'sadness'
    if emotions['disgust'] > 6.61:
        return 'disgust'
    if emotions['fear'] > 6.83:
        return 'fear'
    if emotions['anger'] > 6.79:
        return 'anger'
    return 'trust'

# Read the CSV file
df = pd.read_csv('emotion_percentages.csv')

# Apply the function to the 'Emotion_Percentages' column
df['Emotion'] = df['Emotion_Percentages'].apply(assign_emotion)

# Write the result to a new CSV file
df[['Video ID', 'Emotion']].to_csv('sd_emotions.csv', index=False)

all_emotion_scores = Counter()

with open('emotion_percentages.csv', 'r') as f:
    reader = csv.DictReader(f)
    for row in reader:
        dominant_emotion = assign_emotion(row)
        all_emotion_scores.update({dominant_emotion: 1})



## Merge emotions and topic data
### Save final stablediffusion.csv

In [None]:
import pandas as pd

# Read the CSV files
df_emotions = pd.read_csv('sd_emotions.csv')
df_topics = pd.read_csv('sd_topics.csv')

# Merge the dataframes on 'Video ID'
df_merged = pd.merge(df_emotions, df_topics, on='Video ID')

# Write the result to a new CSV file
df_merged.to_csv('stablediffusion.csv', index=False)
