In [1]:
from openai import OpenAI
import pandas as pd
import json
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import numpy as np

In [2]:
ems = """
        admiration
        amusement
        anger
        annoyance
        approval
        caring
        confusion
        curiosity
        desire
        disappointment
        disapproval
        disgust
        embarrassment
        excitement
        fear
        gratitude
        grief
        joy
        love
        nervousness
        optimism
        pride
        realization
        relief
        remorse
        sadness
        surprise
        neutral
    """

In [3]:
def emotions_to_categorical(df):
    res = []

    for i in df['emotions']:
        tmp = [0 for _ in range(28)]
        for j in i:
            tmp[j] = 1
        res.append(tmp)
    tmp_df = pd.DataFrame(res, columns=ems.split())
    
    return tmp_df

In [4]:
def emotions_to_ekman(df):
    # anger disgust fear joy sadness surprise neutral
    ekman = [3, 3, 0, 0, 3, 3, 5, 5, 3, 4, 0, 1, 4, 3, 2, 3, 4, 3, 3, 2, 3, 3, 5, 3, 4, 4, 5, 6]
    res = []

    for i in df:
        tmp = [0, 0, 0, 0, 0, 0, 0]
        for j in range(len(i)):
            if i[j] == 1:
                tmp[ekman[j]] = 1
        res.append(tmp)
    tmp_df = pd.DataFrame(res, columns=['angry', 'disgust', 'fear', 'joy', 'sadness', 'surprise', 'neutral'])
    
    return tmp_df

In [5]:
def data_init(path = "../data/dev.tsv"):
    df = pd.read_csv(path, sep="\t", encoding = "utf-8", header=None)
    df.columns = ['text', 'emotions', 'id']
    df['emotions'] = list(map(lambda s : list(map(int, s.split(','))), df['emotions']))
    df = pd.concat([df, emotions_to_categorical(df)], axis=1)
    df = df.drop(columns=['emotions', 'id'])
    df['text'] = list(map(lambda s : s.replace('\\', '\\\\').replace('"', '\\"'), list(df['text']))) 
    return df.iloc[:2500, :]

In [8]:
def evaluation(original_df, emotion_res):
    emotions_list = ems.split()
    df = original_df
    predicted_df = pd.DataFrame(data = [[0 for _ in range(28)] for _ in range(len(df))], columns=emotions_list)
    for i in range(len(emotion_res)):
        for j in emotion_res[i]:
            if j in emotions_list:
                predicted_df.loc[i, j] = 1
    predicted = predicted_df.to_numpy()
    original = df.iloc[:,1:].to_numpy()
    
    accuracy = accuracy_score(original, predicted)
    
    precision_micro, recall_micro, f1_micro, _ = precision_recall_fscore_support(
        original, predicted, average='micro'
    )
    precision_macro, recall_macro, f1_macro, _ = precision_recall_fscore_support(
        original, predicted, average='macro'
    )
    
    precision_per_label, recall_per_label, f1_per_label, _ = precision_recall_fscore_support(
        original, predicted, average=None
    )

    precision_macro_std = np.std(precision_per_label)
    recall_macro_std = np.std(recall_per_label)
    f1_macro_std = np.std(f1_per_label)

    print("--- 모델 평가 결과 ---")
    print(f"전체 샘플에 대한 정확도 (Exact Match Accuracy): {accuracy:.4f}")
    print("\n--- Micro 평균 지표 ---")
    print(f"Precision (Micro): {precision_micro:.4f}")
    print(f"Recall (Micro): {recall_micro:.4f}")
    print(f"F1-Score (Micro): {f1_micro:.4f}")
    print("\n--- Macro 평균 지표 ---")
    print(f"Precision (Macro): {precision_macro:.4f}")
    print(f"Recall (Macro): {recall_macro:.4f}")
    print(f"F1-Score (Macro): {f1_macro:.4f}")
    
    print("\n--- 라벨별 지표 ---")
    for i in range(len(emotions_list)):
        print(f"{emotions_list[i]} - Precision: {precision_per_label[i]:.4f}, Recall: {recall_per_label[i]:.4f}, F1-Score: {f1_per_label[i]:.4f}")
    
    print(f"\nPrecision (Macro) 표준편차: {precision_macro_std:.4f}")
    print(f"Recall (Macro) 표준편차: {recall_macro_std:.4f}")
    print(f"F1-Score (Macro) 표준편차: {f1_macro_std:.4f}")

    return accuracy, f1_micro, f1_macro

In [9]:
def evaluation_ekman(original_df, emotion_res):
    emotions_list = 'anger disgust fear joy sadness surprise neutral'.split()
    predicted_df = pd.DataFrame(data = [[0 for _ in range(28)] for _ in range(len(original_df))], columns=ems.split())
    for i in range(len(emotion_res)):
        for j in emotion_res[i]:
            if j in ems.split():
                predicted_df.loc[i, j] = 1
    predicted = emotions_to_ekman(predicted_df.to_numpy()).to_numpy()
    original = emotions_to_ekman(original_df.iloc[:,1:].to_numpy()).to_numpy()

    accuracy = accuracy_score(original, predicted)
    
    precision_micro, recall_micro, f1_micro, _ = precision_recall_fscore_support(
        original, predicted, average='micro'
    )
    precision_macro, recall_macro, f1_macro, _ = precision_recall_fscore_support(
        original, predicted, average='macro'
    )
    
    precision_per_label, recall_per_label, f1_per_label, _ = precision_recall_fscore_support(
        original, predicted, average=None
    )

    precision_macro_std = np.std(precision_per_label)
    recall_macro_std = np.std(recall_per_label)
    f1_macro_std = np.std(f1_per_label)

    print("--- 모델 평가 결과 ---")
    print(f"전체 샘플에 대한 정확도 (Exact Match Accuracy): {accuracy:.4f}")
    print("\n--- Micro 평균 지표 ---")
    print(f"Precision (Micro): {precision_micro:.4f}")
    print(f"Recall (Micro): {recall_micro:.4f}")
    print(f"F1-Score (Micro): {f1_micro:.4f}")
    print("\n--- Macro 평균 지표 ---")
    print(f"Precision (Macro): {precision_macro:.4f}")
    print(f"Recall (Macro): {recall_macro:.4f}")
    print(f"F1-Score (Macro): {f1_macro:.4f}")
    
    print("\n--- 라벨별 지표 ---")
    for i in range(len(emotions_list)):
        print(f"{emotions_list[i]} - Precision: {precision_per_label[i]:.4f}, Recall: {recall_per_label[i]:.4f}, F1-Score: {f1_per_label[i]:.4f}")
    
    print(f"\nPrecision (Macro) 표준편차: {precision_macro_std:.4f}")
    print(f"Recall (Macro) 표준편차: {recall_macro_std:.4f}")
    print(f"F1-Score (Macro) 표준편차: {f1_macro_std:.4f}")

    return accuracy, f1_micro, f1_macro

In [14]:
def file_init():
    file_dict = {}
    file_names = {
        'persona': './prompt/persona.txt',
        'guidelines': './prompt/guidelines.txt',
        'output_structure': './prompt/output_structure.txt',
        'few_shot': './prompt/few_shot.txt',
        'few_shot_4': './prompt/few_shot_4.txt',
        'few_shot_8': './prompt/few_shot_8.txt',
        'few_shot_12': './prompt/few_shot_12.txt',
        'few_shot_16': './prompt/few_shot_16.txt',
        'few_shot_20': './prompt/few_shot_20.txt',
        'cot': './prompt/chain_of_thought.txt',
        'description': './prompt/emotion_description.txt'
    }
    for key, value in file_names.items():
        file = open(value, 'r')
        file_dict[key] = file.read()
        file.close()
    return file_dict

In [24]:
files = file_init()

In [25]:
# system = f"We are trying to classify 28 emotions of goemotion, please read the prompt below carefully and proceed with the classification without errors. {persona}, {guidelines}, {chain_of_thought}, {output_structure}, {few_shot_example}"
system = f"{files['persona']}{files['description']}{files['guidelines']}{files['output_structure']}"

In [26]:
print(system)

(PERSONA ROLE) 
You are an expert system specializing in emotion classification, designed to analyze text with a highly analytical and empathetic approach. You excel at detecting and interpreting a wide range of emotions, considering nuanced language and complex emotional cues. 
Read the Reddit post, identify the emotions expressed, and choose the emotion label that best matches the overall sentiment.
The following 28 emotion label: [admiration, amusement, anger, annoyance, approval, caring, confusion, curiosity, desire, disappointment, disapproval, disgust, embarrassment, excitement, fear, gratitude, grief, joy, love, nervousness, optimism, pride, realization, relief, remorse, sadness, surprise, neutral].
(PERSONA ROLE END)
(EMOTION DESCRIPTION)
admiration Finding something impressive or worthy of respect.
amusement Finding something funny or being entertained.
anger A strong feeling of displeasure or antagonism.
annoyance Mild anger, irritation.
approval Having or expressing a favora

In [27]:
data = data_init()

In [28]:
file_name = "input_0_0_5_0.jsonl"

In [29]:
with open(f"./inputs/{file_name}", "w") as f:
    k = 0
    for record in data["text"]:
        baseQuery = {
        "custom_id": f"query{k}",
        "method": "POST",
        "url": "/v1/responses",
        "body": {
                "model": "gpt-4o-mini",
                "temperature": 0.25,
                "top_p": 0.75,
                "input": [{
                    "role": "developer",
                    "content": f"{system}"
                }, 
                {
                    "role": "user",
                    "content": f"{record}"
                }], 
                "max_output_tokens": 1000,
                "text": {
                    "format": {
                        "type": "json_schema",
                        "name": "result",
                        "strict": True,
                        "schema": {
                            "type": "object",
                            "properties": {
                                "analysis": {
                                    "type": "array",
                                    "items": {
                                        "type": "object",
                                        "properties": {
                                            "emotion": {
                                                "type": "string",
                                                "enum": [ "admiration", "amusement", "anger", "annoyance", "approval", "caring", "confusion", "curiosity", "desire", "disappointment", "disapproval", "disgust", "embarrassment", "excitement", "fear", "gratitude", "grief", "joy", "love", "nervousness", "optimism", "pride", "realization", "relief", "remorse", "sadness", "surprise", "neutral" ]
                                            },
                                            "reason": {
                                                "type": "string"
                                            }
                                        },
                                        "required": ["emotion", "reason"],
                                        "additionalProperties": False
                                    }
                                }
                            },
                            "required": ["analysis"],
                            "additionalProperties": False
                        }
                    }
                }
            }
        }
        k += 1
        f.write(json.dumps(baseQuery) + "\n")

In [30]:
key_file = open('./key/openai_key.txt', 'r')
api_key = key_file.readline()
key_file.close()

In [31]:
client = OpenAI(api_key=api_key)

In [32]:
batch_input_file = client.files.create(
    file=open(f"./inputs/{file_name}", "rb"),
    purpose='batch'
)
print(batch_input_file)

FileObject(id='file-9jD7Ar9ni9SFULg4FKVLUh', bytes=13282821, created_at=1761530732, filename='input_0_0_5_0.jsonl', object='file', purpose='batch', status='processed', expires_at=1764122732, status_details=None)


In [33]:
batch_input_file_id = batch_input_file.id
create_batch=client.batches.create(
    input_file_id=batch_input_file_id,
    endpoint="/v1/responses",
    completion_window="24h",
)

In [34]:
batch_id = create_batch.id
print(batch_id)

batch_68fed36eff188190952e7bb05d3d9599


In [53]:
batch = client.batches.retrieve(batch_id)
result = None
if batch.status == 'completed':
    out = batch.output_file_id
    if out != None:
        print('done!')
        result = client.files.content(out)
    else:
        print('error')
        result = client.files.content(batch.error_file_id).text
elif batch.status == 'failed':
    print('failed')
    print(batch.errors)
else:
    print('it does not finish yet')
    print(batch.status)
    print(batch.request_counts)


done!


In [54]:
json_res = []
for i in result.text.split('\n')[:-1]:
    json_res.append(json.loads(i))

In [56]:
emotion_res = []

for i in json_res:
    tmp = []
    try:
        l = json.loads(i['response']['body']['output'][0]['content'][0]['text'])
        for j in l['analysis']:
            tmp.append(j['emotion'])
    except:
        print(i)
    
    emotion_res.append(tmp)

In [57]:
print(emotion_res)

[['curiosity'], ['anger'], ['sadness'], ['confusion'], ['neutral'], ['disappointment'], ['anger'], ['neutral'], ['optimism'], ['disapproval'], ['optimism'], ['disappointment'], ['sadness'], ['admiration'], ['anger'], ['amusement'], ['amusement'], ['neutral'], ['joy'], ['disapproval'], ['amusement'], ['caring'], ['disapproval'], ['love'], ['anger'], ['caring'], ['anger'], ['amusement'], ['gratitude'], ['curiosity'], ['amusement'], ['fear', 'caring'], ['disapproval'], ['confusion'], ['remorse'], ['sadness'], ['amusement'], ['disapproval'], ['amusement'], ['confusion'], ['joy'], ['excitement'], ['embarrassment'], ['confusion'], ['neutral'], ['anger'], ['fear'], ['disappointment'], ['optimism'], ['curiosity'], ['disappointment'], ['sadness'], ['neutral'], ['confusion'], ['sadness'], ['joy'], ['confusion', 'nervousness'], ['fear', 'sadness'], ['confusion'], ['joy'], ['confusion'], ['sadness'], ['disapproval'], ['optimism'], ['confusion'], ['joy'], ['annoyance'], ['joy'], ['neutral'], ['curi

In [58]:
a, b, d = evaluation(data, emotion_res)

--- 모델 평가 결과 ---
전체 샘플에 대한 정확도 (Exact Match Accuracy): 0.2384

--- Micro 평균 지표 ---
Precision (Micro): 0.3274
Recall (Micro): 0.3032
F1-Score (Micro): 0.3148

--- Macro 평균 지표 ---
Precision (Macro): 0.3694
Recall (Macro): 0.3451
F1-Score (Macro): 0.3128

--- 라벨별 지표 ---
admiration - Precision: 0.6618, Recall: 0.1974, F1-Score: 0.3041
amusement - Precision: 0.3364, Recall: 0.7297, F1-Score: 0.4606
anger - Precision: 0.2302, Recall: 0.6778, F1-Score: 0.3437
annoyance - Precision: 0.2000, Recall: 0.1429, F1-Score: 0.1667
approval - Precision: 0.1868, Recall: 0.0983, F1-Score: 0.1288
caring - Precision: 0.2911, Recall: 0.3538, F1-Score: 0.3194
confusion - Precision: 0.1616, Recall: 0.5441, F1-Score: 0.2492
curiosity - Precision: 0.2249, Recall: 0.3304, F1-Score: 0.2676
desire - Precision: 0.3600, Recall: 0.2647, F1-Score: 0.3051
disappointment - Precision: 0.1532, Recall: 0.2152, F1-Score: 0.1789
disapproval - Precision: 0.2030, Recall: 0.2950, F1-Score: 0.2405
disgust - Precision: 0.4286, Re

In [59]:
evaluation_ekman(data, emotion_res)

--- 모델 평가 결과 ---
전체 샘플에 대한 정확도 (Exact Match Accuracy): 0.4556

--- Micro 평균 지표 ---
Precision (Micro): 0.5303
Recall (Micro): 0.5089
F1-Score (Micro): 0.5194

--- Macro 평균 지표 ---
Precision (Macro): 0.4699
Recall (Macro): 0.4986
F1-Score (Macro): 0.4576

--- 라벨별 지표 ---
anger - Precision: 0.3739, Recall: 0.6181, F1-Score: 0.4659
disgust - Precision: 0.4286, Recall: 0.2941, F1-Score: 0.3488
fear - Precision: 0.3780, Recall: 0.5849, F1-Score: 0.4593
joy - Precision: 0.7178, Recall: 0.6831, F1-Score: 0.7000
sadness - Precision: 0.4153, Recall: 0.5365, F1-Score: 0.4682
surprise - Precision: 0.3647, Recall: 0.5563, F1-Score: 0.4405
neutral - Precision: 0.6107, Recall: 0.2170, F1-Score: 0.3202

Precision (Macro) 표준편차: 0.1280
Recall (Macro) 표준편차: 0.1611
F1-Score (Macro) 표준편차: 0.1133


(0.4556, 0.5193554361918874, 0.4575682303313048)

In [80]:
TF = []

for i in range(len(emotion_res)):
    if 'disgust' in emotion_res[i]:
        TF.append(i)
TF_res = []
for i in TF:
    tmp = []
    for key, value in dict(data.loc[i]==1).items():
        if value:
            tmp.append(key)
    TF_res.append(tmp)

In [81]:
print(TF_res)

[['admiration', 'disappointment'], ['disgust', 'embarrassment'], ['disgust'], ['disgust'], ['amusement'], ['disgust'], ['disgust'], ['disgust'], ['admiration'], ['disgust'], ['neutral'], ['neutral'], ['approval', 'fear'], ['disapproval'], ['annoyance'], ['approval'], ['disapproval', 'disgust'], ['disgust'], ['neutral'], ['surprise'], ['anger'], ['disgust'], ['neutral'], ['neutral'], ['disgust'], ['fear', 'love'], ['fear'], ['disgust'], ['neutral'], ['neutral'], ['confusion'], ['disapproval'], ['disappointment', 'disgust', 'fear'], ['disgust'], ['disapproval', 'disgust']]


In [85]:
TF_dict = {}

for i in TF_res:
    for j in i:
        if j in TF_dict:
            TF_dict[j] += 1
        else:
            TF_dict[j] = 1


In [82]:
FT = []
for i in range(len(data)):
    if data.loc[i, 'disgust'] == 1:
        FT.append(i)

FT_res = []
for i in FT:
    FT_res.append(emotion_res[i])

In [86]:
FT_dict = {}
for i in FT_res:
    for j in i:
        if j in FT_dict:
            FT_dict[j] += 1
        else:
            FT_dict[j] = 1

In [None]:
for i, j  in TF_dict.items():
    print(i, j)

# disgust로 예측한 것들의 실제 값

admiration 2
disappointment 2
disgust 15
embarrassment 1
amusement 1
neutral 7
approval 2
fear 4
disapproval 4
annoyance 1
surprise 1
anger 1
love 1
confusion 1


In [None]:
for i, j  in FT_dict.items():
    print(i, j)


# 답이 disgust 데이터가 어떻게 예측되었는지

disapproval 10
curiosity 2
disgust 15
anger 12
amusement 5
disappointment 4
caring 2
fear 1
love 1
pride 1
surprise 1
sadness 1
confusion 2
annoyance 1
neutral 1
