In [245]:
from google import genai
from google.genai import types
import pandas as pd
from enum import Enum, auto
from pydantic import BaseModel
import re
import json
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import numpy as np

In [246]:
ems = """
        admiration
        amusement
        anger
        annoyance
        approval
        caring
        confusion
        curiosity
        desire
        disappointment
        disapproval
        disgust
        embarrassment
        excitement
        fear
        gratitude
        grief
        joy
        love
        nervousness
        optimism
        pride
        realization
        relief
        remorse
        sadness
        surprise
        neutral
    """

In [247]:
def emotions_to_categorical(df):
    res = []

    for i in df['emotions']:
        tmp = [0 for _ in range(28)]
        for j in i:
            tmp[j] = 1
        res.append(tmp)
    tmp_df = pd.DataFrame(res, columns=ems.split())
    
    return tmp_df

In [248]:
def emotions_to_ekman(df):
    # anger disgust fear joy sadness surprise neutral
    ekman = [3, 3, 0, 0, 3, 3, 5, 5, 3, 4, 0, 1, 4, 3, 2, 3, 4, 3, 3, 2, 3, 3, 5, 3, 4, 4, 5, 6]
    res = []

    for i in df:
        tmp = [0, 0, 0, 0, 0, 0, 0]
        for j in range(len(i)):
            if i[j] == 1:
                tmp[ekman[j]] = 1
        res.append(tmp)
    tmp_df = pd.DataFrame(res, columns=['angry', 'disgust', 'fear', 'joy', 'sadness', 'surprise', 'neutral'])
    
    return tmp_df

In [249]:
def data_init(path = "../data/dev.tsv"):
    df = pd.read_csv(path, sep="\t", encoding = "utf-8", header=None)
    df.columns = ['text', 'emotions', 'id']
    df['emotions'] = list(map(lambda s : list(map(int, s.split(','))), df['emotions']))
    df = pd.concat([df, emotions_to_categorical(df)], axis=1)
    df = df.drop(columns=['emotions', 'id'])
    df['text'] = list(map(lambda s : s.replace('\\', '\\\\').replace('"', '\\"'), list(df['text']))) 
    res = df.sample(2000).reset_index(drop=True)
    return res

In [250]:
def status(original, predict):
    check = []
    for i in range(len(original)):
        cnt = 0
        o = 0
        p = 0
        for j in range(len(original[i])):
            if original[i][j] == 1 and predict[i][j] == 1:
                cnt += 1
            if original[i][j] == 1:
                o += 1
            if predict[i][j] == 1:
                p += 1
        check.append([o, p, cnt])
    return check

In [251]:
def evaluation(original_df, emotion_res):
    emotions_list = ems.split()
    df = original_df
    predicted_df = pd.DataFrame(data = [[0 for _ in range(28)] for _ in range(len(df))], columns=emotions_list)
    for i in range(len(emotion_res)):
        for j in emotion_res[i]:
            if j in emotions_list:
                predicted_df.loc[i, j] = 1
    predicted = predicted_df.to_numpy()
    original = df.iloc[:,1:].to_numpy()

    c = status(original, predicted)
    
    
    accuracy = accuracy_score(original, predicted)
    
    precision_micro, recall_micro, f1_micro, _ = precision_recall_fscore_support(
        original, predicted, average='micro'
    )
    precision_macro, recall_macro, f1_macro, _ = precision_recall_fscore_support(
        original, predicted, average='macro'
    )
    
    precision_per_label, recall_per_label, f1_per_label, _ = precision_recall_fscore_support(
        original, predicted, average=None
    )

    precision_macro_std = np.std(precision_per_label)
    recall_macro_std = np.std(recall_per_label)
    f1_macro_std = np.std(f1_per_label)

    print("--- Î™®Îç∏ ÌèâÍ∞Ä Í≤∞Í≥º ---")
    print(f"Ï†ÑÏ≤¥ ÏÉòÌîåÏóê ÎåÄÌïú Ï†ïÌôïÎèÑ (Exact Match Accuracy): {accuracy:.4f}")
    print("\n--- Micro ÌèâÍ∑† ÏßÄÌëú ---")
    print(f"Precision (Micro): {precision_micro:.4f}")
    print(f"Recall (Micro): {recall_micro:.4f}")
    print(f"F1-Score (Micro): {f1_micro:.4f}")
    print("\n--- Macro ÌèâÍ∑† ÏßÄÌëú ---")
    print(f"Precision (Macro): {precision_macro:.4f}")
    print(f"Recall (Macro): {recall_macro:.4f}")
    print(f"F1-Score (Macro): {f1_macro:.4f}")
    
    print("\n--- ÎùºÎ≤®Î≥Ñ ÏßÄÌëú ---")
    for i in range(len(emotions_list)):
        print(f"{emotions_list[i]} - Precision: {precision_per_label[i]:.4f}, Recall: {recall_per_label[i]:.4f}, F1-Score: {f1_per_label[i]:.4f}")
    
    print(f"\nPrecision (Macro) ÌëúÏ§ÄÌé∏Ï∞®: {precision_macro_std:.4f}")
    print(f"Recall (Macro) ÌëúÏ§ÄÌé∏Ï∞®: {recall_macro_std:.4f}")
    print(f"F1-Score (Macro) ÌëúÏ§ÄÌé∏Ï∞®: {f1_macro_std:.4f}")

    return accuracy, f1_micro, f1_macro, c

In [252]:
def evaluation_ekman(original_df, emotion_res):
    emotions_list = 'anger disgust fear joy sadness surprise neutral'.split()
    predicted_df = pd.DataFrame(data = [[0 for _ in range(28)] for _ in range(len(original_df))], columns=ems.split())
    for i in range(len(emotion_res)):
        for j in emotion_res[i]:
            if j in ems.split():
                predicted_df.loc[i, j] = 1
    predicted = emotions_to_ekman(predicted_df.to_numpy()).to_numpy()
    original = emotions_to_ekman(original_df.iloc[:,1:].to_numpy()).to_numpy()

    accuracy = accuracy_score(original, predicted)
    
    precision_micro, recall_micro, f1_micro, _ = precision_recall_fscore_support(
        original, predicted, average='micro'
    )
    precision_macro, recall_macro, f1_macro, _ = precision_recall_fscore_support(
        original, predicted, average='macro'
    )
    
    precision_per_label, recall_per_label, f1_per_label, _ = precision_recall_fscore_support(
        original, predicted, average=None
    )

    precision_macro_std = np.std(precision_per_label)
    recall_macro_std = np.std(recall_per_label)
    f1_macro_std = np.std(f1_per_label)

    print("--- Î™®Îç∏ ÌèâÍ∞Ä Í≤∞Í≥º ---")
    print(f"Ï†ÑÏ≤¥ ÏÉòÌîåÏóê ÎåÄÌïú Ï†ïÌôïÎèÑ (Exact Match Accuracy): {accuracy:.4f}")
    print("\n--- Micro ÌèâÍ∑† ÏßÄÌëú ---")
    print(f"Precision (Micro): {precision_micro:.4f}")
    print(f"Recall (Micro): {recall_micro:.4f}")
    print(f"F1-Score (Micro): {f1_micro:.4f}")
    print("\n--- Macro ÌèâÍ∑† ÏßÄÌëú ---")
    print(f"Precision (Macro): {precision_macro:.4f}")
    print(f"Recall (Macro): {recall_macro:.4f}")
    print(f"F1-Score (Macro): {f1_macro:.4f}")
    
    print("\n--- ÎùºÎ≤®Î≥Ñ ÏßÄÌëú ---")
    for i in range(len(emotions_list)):
        print(f"{emotions_list[i]} - Precision: {precision_per_label[i]:.4f}, Recall: {recall_per_label[i]:.4f}, F1-Score: {f1_per_label[i]:.4f}")
    
    print(f"\nPrecision (Macro) ÌëúÏ§ÄÌé∏Ï∞®: {precision_macro_std:.4f}")
    print(f"Recall (Macro) ÌëúÏ§ÄÌé∏Ï∞®: {recall_macro_std:.4f}")
    print(f"F1-Score (Macro) ÌëúÏ§ÄÌé∏Ï∞®: {f1_macro_std:.4f}")

    return accuracy, f1_micro, f1_macro

In [253]:
persona_file = open('./prompt/persona.txt', 'r')
persona = persona_file.read()
persona_file.close()

In [254]:
guidelines_file = open('./prompt/guidelines.txt', 'r')
guidelines = guidelines_file.read()
guidelines_file.close()

In [255]:
output_structure_file = open('./prompt/output_structure.txt', 'r')
output_structure = output_structure_file.read()
output_structure_file.close()

In [256]:
few_shot_file = open('./prompt/few_shot.txt', 'r')
few_shot = few_shot_file.read().replace('  ', "").replace('   ', "").replace('    ', "").replace('     ', "").replace('      ', "").replace('\n'," ").replace('\t', " ").replace('"', "'").replace('{ ', '{').replace('[ ', '[').replace(' }', '}').replace(' ]', ']')
few_shot_file.close()

In [257]:
chain_of_thought_file = open('./prompt/chain_of_thought.txt', 'r')
chain_of_thought = chain_of_thought_file.read().replace('\n', ' ').replace('"', "'")
chain_of_thought_file.close()

In [258]:
system = f"{persona}\n{guidelines}\n{output_structure}\n{few_shot}"

In [259]:
# system = "Persona -Role You are an expert system specializing in emotion classification, designed to analyze text with a highly analytical and empathetic approach. -Capability You excel at detecting and interpreting a wide range of emotions, considering nuanced language and complex emotional cues. Task Read the Reddit post, identify the emotions expressed, and choose the emotion label that best matches the overall sentiment."

In [260]:
print(system)

(PERSONA ROLE) 
You are an expert system specializing in emotion classification, designed to analyze text with a highly analytical and empathetic approach. You excel at detecting and interpreting a wide range of emotions, considering nuanced language and complex emotional cues. 
Read the Reddit post, identify the emotions expressed, and choose the emotion label that best matches the overall sentiment.
The following 28 emotion label: [admiration, amusement, anger, annoyance, approval, caring, confusion, curiosity, desire, disappointment, disapproval, disgust, embarrassment, excitement, fear, gratitude, grief, joy, love, nervousness, optimism, pride, realization, relief, remorse, sadness, surprise, neutral].
(PERSONA ROLE END)

(GUIDELINES) 
1. Think step by step carefully, First, identify key phrases and their emotional cues in the text. Second, consider which of the 28 labels best match these cues.
2. Default to a single emotion. Your primary goal is to find the single most dominant em

In [261]:
model = "gemini-2.5-flash-lite"

In [262]:
key_file = open('./key/gemini_key.txt', 'r')
api_key = key_file.readline()
key_file.close()

In [264]:
client = genai.Client(api_key=api_key)

In [265]:
class Emotion(Enum):
    admiration = "admiration"
    amusement = "amusement"
    anger = "anger"
    annoyance = "annoyance"
    approval = "approval"
    caring = "caring"
    confusion = "confusion"
    curiosity = "curiosity"
    desire = "desire"
    disappointment = "disappointment"
    disapproval = "disapproval"
    disgust = "disgust"
    embarrassment = "embarrassment"
    excitement = "excitement"
    fear = "fear"
    gratitude = "gratitude"
    grief = "grief"
    joy = "joy"
    love = "love"
    nervousness = "nervousness"
    optimism = "optimism"
    pride = "pride"
    realization = "realization"
    relief = "relief"
    remorse = "remorse"
    sadness = "sadness"
    surprise = "surprise"
    neutral = "neutral"

In [266]:
class Analysis(BaseModel):
    emotion: Emotion
    reason: str

In [267]:
data = data_init()

In [268]:
query = []

In [269]:
for i in data['text']:
    baseQuery = {
        "contents": [{
            "parts": [{
                "text": f"{i}"
            }]
        }],
        "config": {
            "systemInstruction": {
                "parts": [{
                    "text": f"{system}"
                }]
            },
            'response_mime_type': 'application/json',
            'response_schema': list[Analysis],
            "maxOutputTokens": 500,
            "temperature": 0,
            "thinkingConfig": {
                "includeThoughts": False,
                "thinkingBudget": 0
            }
        }
    }
    query.append(baseQuery)

In [270]:
print(len(data))

2000


In [271]:
inline_batch_jobs = client.batches.create(
    model=model,
    src=query,
    config={
        'display_name': "emotion test 3"
    }
)

In [278]:
job_name = inline_batch_jobs.name
print(f"Polling status for job: {job_name}")

batch_job_inline = client.batches.get(name=job_name)
if batch_job_inline.state.name in ('JOB_STATE_SUCCEEDED', 'JOB_STATE_FAILED', 'JOB_STATE_CANCELLED', 'JOB_STATE_EXPIRED'):
    print(batch_job_inline.state.name)
else:
    print(f"Job not finished. Current state: {batch_job_inline.state.name}.")

Polling status for job: batches/2mfpv7i7w8nkaqbz5swnnvgmnw1gd2z9cbfl
Job not finished. Current state: JOB_STATE_PENDING.


In [237]:
batch_job = batch_job_inline
res = []
for i, inline_response in enumerate(batch_job.dest.inlined_responses):
    if inline_response.response:
        try:
            res.append(json.loads(inline_response.response.text))
        except AttributeError:
            print(inline_response.response) # Fallback
    elif inline_response.error:
        print(f"Error: {inline_response.error}")

In [238]:
print(res)



In [239]:
emotion_res = []

for i in res:
    tmp = []
    for j in i:
        tmp.append(j['emotion'])
    emotion_res.append(tmp)

In [242]:
a, b, d, c = evaluation(data, emotion_res)

--- Î™®Îç∏ ÌèâÍ∞Ä Í≤∞Í≥º ---
Ï†ÑÏ≤¥ ÏÉòÌîåÏóê ÎåÄÌïú Ï†ïÌôïÎèÑ (Exact Match Accuracy): 0.0525

--- Micro ÌèâÍ∑† ÏßÄÌëú ---
Precision (Micro): 0.0767
Recall (Micro): 0.0946
F1-Score (Micro): 0.0847

--- Macro ÌèâÍ∑† ÏßÄÌëú ---
Precision (Macro): 0.0413
Recall (Macro): 0.0598
F1-Score (Macro): 0.0423

--- ÎùºÎ≤®Î≥Ñ ÏßÄÌëú ---
admiration - Precision: 0.0593, Recall: 0.0363, F1-Score: 0.0450
amusement - Precision: 0.0662, Recall: 0.1513, F1-Score: 0.0921
anger - Precision: 0.0346, Recall: 0.1039, F1-Score: 0.0519
annoyance - Precision: 0.0633, Recall: 0.0463, F1-Score: 0.0535
approval - Precision: 0.0705, Recall: 0.1618, F1-Score: 0.0982
caring - Precision: 0.0000, Recall: 0.0000, F1-Score: 0.0000
confusion - Precision: 0.0571, Recall: 0.1053, F1-Score: 0.0741
curiosity - Precision: 0.0312, Recall: 0.0532, F1-Score: 0.0394
desire - Precision: 0.0000, Recall: 0.0000, F1-Score: 0.0000
disappointment - Precision: 0.0347, Recall: 0.0847, F1-Score: 0.0493
disapproval - Precision: 0.0772, Recall

In [None]:
cnt = 0
m = 0
n = 0
for i in c:
    if i[2] > 0:
        cnt += 1
    if i[1] > 1 and i[0] == 1:
        m += 1
    if i[0] > 1:
        n += 1

In [456]:
print(cnt / 800)
print(m, n / 800 * 100)

0.26125
364 15.75


In [241]:
evaluation_ekman(data, emotion_res)

--- Î™®Îç∏ ÌèâÍ∞Ä Í≤∞Í≥º ---
Ï†ÑÏ≤¥ ÏÉòÌîåÏóê ÎåÄÌïú Ï†ïÌôïÎèÑ (Exact Match Accuracy): 0.1790

--- Micro ÌèâÍ∑† ÏßÄÌëú ---
Precision (Micro): 0.2514
Recall (Micro): 0.2836
F1-Score (Micro): 0.2666

--- Macro ÌèâÍ∑† ÏßÄÌëú ---
Precision (Macro): 0.1482
Recall (Macro): 0.1670
F1-Score (Macro): 0.1501

--- ÎùºÎ≤®Î≥Ñ ÏßÄÌëú ---
anger - Precision: 0.1302, Recall: 0.2316, F1-Score: 0.1667
disgust - Precision: 0.0000, Recall: 0.0000, F1-Score: 0.0000
fear - Precision: 0.0000, Recall: 0.0000, F1-Score: 0.0000
joy - Precision: 0.4280, Recall: 0.4862, F1-Score: 0.4552
sadness - Precision: 0.0561, Recall: 0.1096, F1-Score: 0.0742
surprise - Precision: 0.1289, Recall: 0.1875, F1-Score: 0.1528
neutral - Precision: 0.2945, Recall: 0.1538, F1-Score: 0.2021

Precision (Macro) ÌëúÏ§ÄÌé∏Ï∞®: 0.1477
Recall (Macro) ÌëúÏ§ÄÌé∏Ï∞®: 0.1539
F1-Score (Macro) ÌëúÏ§ÄÌé∏Ï∞®: 0.1449


(0.179, 0.26655275523280647, 0.15014721361995206)