# Dataset loading and analysis

In [22]:
#importing the necessary libraries
import pandas as pd 
from datasets import load_dataset 

#loading the dataset using the Hugging Face datasets library
df = load_dataset("google-research-datasets/go_emotions")

In [2]:
# Exploring the training set
df['train'][0]

{'text': "My favourite food is anything I didn't have to cook myself.",
 'labels': [27],
 'id': 'eebbqej'}

In [3]:
df['validation'][0]

{'text': 'Is this in New Orleans?? I really feel like this is New Orleans.',
 'labels': [27],
 'id': 'edgurhb'}

In [4]:
#mapping the labels to the emotion names; 
# Link to mapping: https://github.com/google-research/google-research/blob/756ae45c4880ad6a01869608250d85a8fb253799/goemotions/data/emotions.txt

label_index = {
    "0": "admiration",
    "1": "amusement",
    "2": "anger",
    "3": "annoyance",
    "4": "approval",
    "5": "caring",
    "6": "confusion",
    "7": "curiosity",
    "8": "desire",
    "9": "disappointment",
    "10": "disapproval",
    "11": "disgust",
    "12": "embarassment",
    "13": "excitement",
    "14": "fear",
    "15": "gratitude",
    "16": "grief",
    "17": "joy",
    "18": "love",
    "19": "nervousness",
    "20": "optimism",
    "21": "pride",
    "22": "realization",
    "23": "relief",
    "24": "remorse",
    "25": "sadness",
    "26": "surprise",
    "27": "neutral"
}

In [6]:
#Get the first 1000 rows of the training set, map the labels to the emotion names and store in data_df

# Get first 1000 rows from training set
train_subset = df['train'].select(range(1000))

# Convert to list of dictionaries and map labels to emotion names
data_list = []
for example in train_subset:
    # Map each label (integer) to its emotion name using label_index
    emotion_labels_list = [label_index[str(label)] for label in example['labels']]
    emotion_labels_string = ", ".join(emotion_labels_list)
    data_list.append({
        'comment': example['text'],
        'emotion labels': emotion_labels_string  # Now contains emotion names instead of numbers
    })

# Create DataFrame
comments_df = pd.DataFrame(data_list)


In [9]:
#print the first 10 rows of the dataframe
comments_df.sample(10)

Unnamed: 0,comment,emotion labels
780,also anxious that people will be angry or surp...,nervousness
194,I always just say “thank you” because it’s a n...,"admiration, gratitude"
174,Define woman please if you're not going to use...,neutral
866,What are you even talking about? I'm sorry you...,remorse
295,"I'm scared to even ask my mom ,I might get ye...",fear
747,I have the same issue with a new co-worker and...,annoyance
376,Omg so glad I’m not alone,"joy, relief"
772,"I wouldn’t worry, most United fans have never ...",disapproval
856,I know you're trying to play devil's advocate ...,neutral
519,"That's pretty good all things considered, if t...","admiration, approval, optimism"


In [10]:
# Count all individual emotions in the dataframe
# quick check to see if all emotions are used

emotion_counts = {emotion: 0 for emotion in label_index.values()}

for emotion_combo in comments_df['emotion labels']:
    for emotion in emotion_combo.split(', '):
        emotion_counts[emotion] += 1

# Print the counts
print("=" * 60)
print("COUNT OF ALL EMOTIONS IN THE DATAFRAME:")
print("=" * 60)
for emotion, count in sorted(emotion_counts.items()):
    print(f"{emotion}: {count}")

# (2) Check for missing emotions
missing_emotions = [emotion for emotion, count in emotion_counts.items() if count == 0]

print("\n" + "=" * 60)
print("MISSING EMOTIONS:")
print("=" * 60)
if not missing_emotions:
    print("All emotions are present at least once.")
else:
    print("The following emotions are missing:", missing_emotions)
print("=" * 60)



COUNT OF ALL EMOTIONS IN THE DATAFRAME:
admiration: 99
amusement: 61
anger: 41
annoyance: 57
approval: 67
caring: 21
confusion: 33
curiosity: 50
desire: 13
disappointment: 34
disapproval: 49
disgust: 13
embarassment: 6
excitement: 13
fear: 10
gratitude: 63
grief: 6
joy: 39
love: 47
nervousness: 3
neutral: 336
optimism: 29
pride: 1
realization: 19
relief: 2
remorse: 9
sadness: 32
surprise: 35

MISSING EMOTIONS:
All emotions are present at least once.


# LLM setup for performance comparison

In [11]:
# -----------------------------
# Import + API key setup block
# -----------------------------
import os
from dotenv import load_dotenv

# Load variables from the .env file (including OPENAI_API_KEY)
load_dotenv()

# Set your API key; Ensure that you have created an API key in Open AI and added it to the .env file
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")

# LangChain core utilities
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

# OpenAI Chat models via LangChain
from langchain_openai import ChatOpenAI

# Progress bar for batch inference
from tqdm import tqdm

In [12]:
#Create a prompt template to pass to the LLM for inference
prompt_template = """
    You are a highly intelligent emotion classification assistant.
    You carefully read a comment, and label it with one or more pre-selected emotion labels to it.
    The emotion labels are listed here:
    {emotions} 
    Your output should be just the emotion label that you are applying to the comment, and if you
    think there are multple labels then output the labels separated by commas.
    The comment to analyze is here: {comment}
    """
#initialize the LangChain string output parser
output_parser = StrOutputParser()

#Initialize the OpenAI model for comparison - let's use gpt-4o-mini 
gpt4o_mini_llm = ChatOpenAI(temperature = 0.0, model = "gpt-4o-mini")  

#Initialize LangChain prompt template object 
emotion_labeling_prompt = ChatPromptTemplate.from_template(prompt_template) 

#Create an inference chain 
labeling_chain = ( 

    {"comment" : RunnablePassthrough(), "emotions" : RunnablePassthrough()}
    | emotion_labeling_prompt
    | gpt4o_mini_llm
    | output_parser
)

#create a list of emotions to pass to the LLM in the prompt 
emotion_labels = []
for label in label_index.values():
    print(label) 
    emotion_labels.append(label)

admiration
amusement
anger
annoyance
approval
caring
confusion
curiosity
desire
disappointment
disapproval
disgust
embarassment
excitement
fear
gratitude
grief
joy
love
nervousness
optimism
pride
realization
relief
remorse
sadness
surprise
neutral


# GPT 4o Mini Comparison

In [13]:
# The initial gpt-4-t baseline on the first 100 comments
def analyze_emotion(comment, emotions):
    input_data = {
        "comment": comment,
        "emotions": emotion_labels
    }
    try:
        result = labeling_chain.invoke(input_data)
        return result
    except Exception as e:
        return str(e)

gpt4o_mini_emotion_labels = []

# Loop through the first 100 comments with progress bar
for i in tqdm(range(100), desc="Processing comments", unit="comment"):
    comment = comments_df['comment'][i]  # Get the i-th comment
    emotion_label = analyze_emotion(comment, emotion_labels)  # Analyze the comment
    gpt4o_mini_emotion_labels.append(emotion_label)  # Append the result to the list

# Assign the list of emotion labels to a new column in the DataFrame
# Assign directly to first 100 rows using .loc to avoid NaN values in remaining rows
comments_df.loc[:99, 'gpt4o-mini emotion label'] = gpt4o_mini_emotion_labels

Processing comments: 100%|██████████| 100/100 [00:52<00:00,  1.91comment/s]


In [21]:
#Preview the dataset
comments_df[7:12]

Unnamed: 0,comment,emotion labels,gpt4o-mini emotion label
7,We need more boards and to create a bit more s...,"desire, optimism","approval, optimism"
8,Damn youtube and outrage drama is super lucrat...,admiration,"anger, annoyance, disapproval"
9,It might be linked to the trust factor of your...,neutral,"curiosity, confusion"
10,Demographics? I don’t know anybody under 35 wh...,confusion,"confusion, curiosity"
11,"Aww... she'll probably come around eventually,...","amusement, approval","admiration, amusement, caring, optimism"


# Creating training and validation datasets for fine-tuning OpenAI LLMs

In [25]:
# Create a training dataset with 600 examples in JSONL format, which is a pre-requisite for fine-tuning OpenAI models
import json

# Create emotion labels string for the system message
emotion_labels_str = ", ".join(emotion_labels)

# Prepare training data from rows 100 to 700
training_data = []

# Loop through rows 100 to 700 (inclusive)
for idx in tqdm(range(100, 700), desc="Creating training dataset", unit="row"):
    row = comments_df.iloc[idx]
    
    # Create the JSON structure for each training example; This chat prompt template for OpenAI LLM inference explicitely mentions the different roles
    training_example = {
        "messages": [
            {
                "role": "system",
                "content": f"You are a highly intelligent emotion classification assistant. You carefully read a comment, and label it with one or more pre-selected emotion labels to it. The emotion labels are listed here: {emotion_labels_str}.Your output should be just the emotion label that you are applying to the comment, and if you think there are multple labels then output the labels separated by commas."
            },
            {
                "role": "user",
                "content": f"Label the emotion of this comment: {row['comment']}"
            },
            {
                "role": "assistant",
                "content": row['emotion labels']
            }
        ]
    }
    
    training_data.append(training_example)

# Write to JSONL file
training_data_jsonl = "training_data.jsonl"
with open(training_data_jsonl, 'w') as f:
    for example in training_data:
        f.write(json.dumps(example) + '\n')

print(f"\nTraining dataset created successfully!")
print(f"Total examples: {len(training_data)}")
print(f"Output file: {training_data_jsonl}")

Creating training dataset: 100%|██████████| 600/600 [00:00<00:00, 29741.21row/s]


Training dataset created successfully!
Total examples: 600
Output file: training_data.jsonl





In [24]:
# Create a validation dataset in JSONL format (required for fine-tuning OpenAI models)

# Create emotion labels string for the system message
emotion_labels_str = ", ".join(emotion_labels)

# Prepare validation data from rows 700 to 1000
validation_data = []

# Loop through rows 700 to 1000 (inclusive)
for idx in tqdm(range(700, 1000), desc="Creating validation dataset", unit="row"):
    row = comments_df.iloc[idx]
    
    # Create the JSON structure for each validation example
    validation_example = {
        "messages": [
            {
                "role": "system",
                "content": (
                    f"You are a highly intelligent emotion classification assistant. "
                    f"You carefully read a comment and label it with one or more pre-selected emotion labels. "
                    f"The emotion labels are listed here: {emotion_labels_str}. "
                    f"Your output should be just the emotion label you apply to the comment, and if there are multiple, "
                    f"output the labels separated by commas."
                )
            },
            {
                "role": "user",
                "content": f"Label the emotion of this comment: {row['comment']}"
            },
            {
                "role": "assistant",
                "content": row['emotion labels']  # ground-truth label
            }
        ]
    }
    
    validation_data.append(validation_example)

# Write to JSONL file
validation_data_jsonl = "validation_data.jsonl"
with open(validation_data_jsonl, 'w') as f:
    for example in validation_data:
        f.write(json.dumps(example) + '\n')

print("\nValidation dataset created successfully!")
print(f"Total examples: {len(validation_data)}")
print(f"Output file: {validation_data_jsonl}")

Creating validation dataset: 100%|██████████| 300/300 [00:00<00:00, 29117.67row/s]


Validation dataset created successfully!
Total examples: 300
Output file: validation_data.jsonl





## Fine-Tuned GPT 3.5 Turbo Comparison

In [31]:
#Use the same prompt template for the fine-tuned model as we did for the gpt-4o-mini model

""" Now let's use the fine-tuned gpt-3.5-turbo model for inference; 
You can find the fine-tuned model under OpenAI Platform > Fine-Tuning > Jobs > Job Name > Output Model """

gpt35t_ft_llm = ChatOpenAI(temperature = 0.0, model = "ft:gpt-3.5-turbo-1106:personal:llm-finetuning-go-emotions:CcdmOzNu")  

#Initialize LangChain prompt template object 
emotion_labeling_prompt_gpt35t_ft = ChatPromptTemplate.from_template(prompt_template) 

#Create an inference chain 
labeling_chain_gpt35t_ft = ( 

    {"comment" : RunnablePassthrough(), "emotions" : RunnablePassthrough()}
    | emotion_labeling_prompt_gpt35t_ft
    | gpt35t_ft_llm
    | output_parser
)

#create a list of emotions to pass to the LLM in the prompt 
emotion_labels_gpt35t_ft = []
for label in label_index.values():
    emotion_labels_gpt35t_ft.append(label) 


# Function to analyze the emotion of a comment using the fine-tuned gpt-3.5-turbo model
def analyze_emotion_gpt35t_ft(comment, emotions):
    input_data = {
        "comment": comment,
        "emotions": emotion_labels_gpt35t_ft
    }
    try:
        result = labeling_chain_gpt35t_ft.invoke(input_data)
        return result
    except Exception as e:
        return str(e)

gpt35t_ft_emotion_labels = []

# Loop through the first 100 comments with progress bar
for i in tqdm(range(100), desc="Processing comments", unit="comment"):
    comment = comments_df['comment'][i]  # Get the i-th comment
    emotion_label = analyze_emotion_gpt35t_ft(comment, emotion_labels_gpt35t_ft)  # Analyze the comment
    gpt35t_ft_emotion_labels.append(emotion_label)  # Append the result to the list

# Assign the list of emotion labels to a new column in the DataFrame
# Assign directly to first 100 rows using .loc to avoid NaN values in remaining rows
comments_df.loc[:99, 'gpt3.5t ft emotion label'] = gpt35t_ft_emotion_labels


Processing comments: 100%|██████████| 100/100 [01:29<00:00,  1.12comment/s]


In [42]:
""" Preview the dataset to see a sample of the fine-tuned model's predictions
against the ground-truth labels and the gpt-4o-mini model's predictions"""
comments_df.iloc[7:12]

Unnamed: 0,comment,emotion labels,gpt4o-mini emotion label,gpt3.5t ft emotion label
7,We need more boards and to create a bit more s...,"desire, optimism","curiosity, optimism",neutral
8,Damn youtube and outrage drama is super lucrat...,admiration,"anger, annoyance, disapproval",anger
9,It might be linked to the trust factor of your...,neutral,curiosity,neutral
10,Demographics? I don’t know anybody under 35 wh...,confusion,"confusion, curiosity",neutral
11,"Aww... she'll probably come around eventually,...","amusement, approval","amusement, caring, optimism",amusement
