# Setting Emotion Labels for Comment and Post

First we initialize the list of emotions as they are ordered in the GoEmotion Dataset

In [1]:
import pandas as pd
import numpy as np

# Define the emotion labels based on the GoEmotions dataset
emotion_labels = [
    'admiration', 'amusement', 'anger', 'annoyance', 'approval', 'caring',
    'confusion', 'curiosity', 'desire', 'disappointment', 'disapproval',
    'disgust', 'embarrassment', 'excitement', 'fear', 'gratitude',
    'grief', 'joy', 'love', 'nervousness', 'optimism', 'pride',
    'realization', 'relief', 'remorse', 'sadness', 'surprise', 'neutrality'
]

## Comment Emotion Labels

We set a minimum prob of 0.7 for neutral because the model was biased towards it, predicting neutral when it should not. 

In [None]:
# Load the CSV file 
comments_df = pd.read_csv('/{add directory}/comments_with_prob_predictions.csv')

# Set a higher threshold for neutral as a penalty
neutral_threshold_comment = 0.70

# Function to map probabilities to emotion labels with the specified thresholds
def map_to_emotions_comment(row):
    neutral_prob = row[-1]  # Neutrality is the last class
    if neutral_prob >= neutral_threshold_comment:
        return 'neutrality'
    else:
        # Exclude neutrality and find the highest probability emotion
        non_neutral_probs = row[:-1]
        max_emotion_index = np.argmax(non_neutral_probs)
        return emotion_labels[max_emotion_index]

# Apply the function across the DataFrame
comments_df['emotion_label'] = comments_df[[f'class_{i}' for i in range(len(emotion_labels))]].apply(map_to_emotions_comment, axis=1)

# List of columns to drop
columns_to_drop = [f'class_{i}' for i in range(28)]

# Drop the prediction columns
comments_df = comments_df.drop(columns=columns_to_drop)

# Save the updated DataFrame with the new label column
output_file_path_with_labels = '/{add directory}/comments_with_emotion_labels.csv'
comments_df.to_csv(output_file_path_with_labels, index=False)

print(f"Emotion labels have been added and saved to '{output_file_path_with_labels}'.")


## Post Emotion Labels

We set a slightly lower threshold for posts, as we expect them to be more neutral, hence put a lower penalty.

In [None]:
# Load the CSV file 
posts_df = pd.read_csv('/{add directory}/posts_with_prob_predictions.csv')

# Set a higher threshold for neutral as a penalty
neutral_threshold_post = 0.65

# Function to map probabilities to emotion labels with the specified thresholds
def map_to_emotions_post(row):
    neutral_prob = row[-1]  # Neutrality is the last class
    if neutral_prob >= neutral_threshold_post:
        return 'neutrality'
    else:
        # Exclude neutrality and find the highest probability emotion
        non_neutral_probs = row[:-1]
        max_emotion_index = np.argmax(non_neutral_probs)
        return emotion_labels[max_emotion_index]

# Apply the function across the DataFrame 
posts_df['emotion_label'] = posts_df[[f'class_{i}' for i in range(len(emotion_labels))]].apply(map_to_emotions_post, axis=1)

# List of columns to drop
columns_to_drop = [f'class_{i}' for i in range(28)]

# Drop the prediction and combined_text columns
posts_df = posts_df.drop(columns=columns_to_drop)
posts_df = posts_df.drop(columns=['combined_text'])


# Save the updated DataFrame with the new label column
output_file_path_with_labels = '/{add directory}/posts_with_emotion_labels.csv'
posts_df.to_csv(output_file_path_with_labels, index=False)

print(f"Emotion labels have been added and saved to '{output_file_path_with_labels}'.")
