In [27]:
from openai import OpenAI
import pandas as pd
import json
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import numpy as np
import matplotlib.pyplot as plt

In [28]:
ems = """
admiration
amusement
anger
annoyance
approval
caring
confusion
curiosity
desire
disappointment
disapproval
disgust
embarrassment
excitement
fear
gratitude
grief
joy
love
nervousness
optimism
pride
realization
relief
remorse
sadness
surprise
neutral
    """

In [29]:
def emotions_to_categorical(df):
    res = []

    for i in df['emotions']:
        tmp = [0 for _ in range(28)]
        for j in i:
            tmp[j] = 1
        res.append(tmp)
    tmp_df = pd.DataFrame(res, columns=ems.split())
    
    return tmp_df

In [30]:
def emotions_to_ekman(df):
    # anger disgust fear joy sadness surprise neutral
    ekman = [3, 3, 0, 0, 3, 3, 5, 5, 3, 4, 0, 1, 4, 3, 2, 3, 4, 3, 3, 2, 3, 3, 5, 3, 4, 4, 5, 6]
    res = []

    for i in df:
        tmp = [0, 0, 0, 0, 0, 0, 0]
        for j in range(len(i)):
            if i[j] == 1:
                tmp[ekman[j]] = 1
        res.append(tmp)
    tmp_df = pd.DataFrame(res, columns=['angry', 'disgust', 'fear', 'joy', 'sadness', 'surprise', 'neutral'])
    
    return tmp_df

In [43]:
def data_init(path = "../../data/dev.tsv"):
    df = pd.read_csv(path, sep="\t", encoding = "utf-8", header=None)
    df.columns = ['text', 'emotions', 'id']
    df['emotions'] = list(map(lambda s : list(map(int, s.split(','))), df['emotions']))
    df = pd.concat([df, emotions_to_categorical(df)], axis=1)
    df = df.drop(columns=['emotions', 'id'])
    df['text'] = list(map(lambda s : s.replace('\\', '\\\\').replace('"', '\\"'), list(df['text']))) 
    return df

In [32]:
def evaluation(original_df, emotion_res):
    emotions_list = ems.split()
    df = original_df
    predicted_df = pd.DataFrame(data = [[0 for _ in range(28)] for _ in range(len(df))], columns=emotions_list)
    for i in range(len(emotion_res)):
        for j in emotion_res[i]:
            if j in emotions_list:
                predicted_df.loc[i, j] = 1
    predicted = predicted_df.to_numpy()
    original = df.iloc[:,1:].to_numpy()
    
    
    accuracy = accuracy_score(original, predicted)
    
    precision_micro, recall_micro, f1_micro, _ = precision_recall_fscore_support(
        original, predicted, average='micro'
    )
    precision_macro, recall_macro, f1_macro, _ = precision_recall_fscore_support(
        original, predicted, average='macro'
    )
    
    precision_per_label, recall_per_label, f1_per_label, _ = precision_recall_fscore_support(
        original, predicted, average=None
    )

    precision_macro_std = np.std(precision_per_label)
    recall_macro_std = np.std(recall_per_label)
    f1_macro_std = np.std(f1_per_label)

    print("--- 모델 평가 결과 ---")
    print(f"전체 샘플에 대한 정확도 (Exact Match Accuracy): {accuracy:.4f}")
    print("\n--- Micro 평균 지표 ---")
    print(f"Precision (Micro): {precision_micro:.4f}")
    print(f"Recall (Micro): {recall_micro:.4f}")
    print(f"F1-Score (Micro): {f1_micro:.4f}")
    print("\n--- Macro 평균 지표 ---")
    print(f"Precision (Macro): {precision_macro:.4f}")
    print(f"Recall (Macro): {recall_macro:.4f}")
    print(f"F1-Score (Macro): {f1_macro:.4f}")
    
    print("\n--- 라벨별 지표 ---")
    for i in range(len(emotions_list)):
        print(f"{emotions_list[i]} - Precision: {precision_per_label[i]:.4f}, Recall: {recall_per_label[i]:.4f}, F1-Score: {f1_per_label[i]:.4f}")
    
    print(f"\nPrecision (Macro) 표준편차: {precision_macro_std:.4f}")
    print(f"Recall (Macro) 표준편차: {recall_macro_std:.4f}")
    print(f"F1-Score (Macro) 표준편차: {f1_macro_std:.4f}")

    return accuracy, f1_micro, f1_macro, precision_recall_fscore_support(original, predicted, average='macro')

In [33]:
def evaluation_ekman(original_df, emotion_res):
    emotions_list = 'anger disgust fear joy sadness surprise neutral'.split()
    predicted_df = pd.DataFrame(data = [[0 for _ in range(28)] for _ in range(len(original_df))], columns=ems.split())
    for i in range(len(emotion_res)):
        for j in emotion_res[i]:
            if j in ems.split():
                predicted_df.loc[i, j] = 1
    predicted = emotions_to_ekman(predicted_df.to_numpy()).to_numpy()
    original = emotions_to_ekman(original_df.iloc[:,1:].to_numpy()).to_numpy()

    accuracy = accuracy_score(original, predicted)
    
    precision_micro, recall_micro, f1_micro, _ = precision_recall_fscore_support(
        original, predicted, average='micro'
    )
    precision_macro, recall_macro, f1_macro, _ = precision_recall_fscore_support(
        original, predicted, average='macro'
    )
    
    precision_per_label, recall_per_label, f1_per_label, _ = precision_recall_fscore_support(
        original, predicted, average=None
    )

    precision_macro_std = np.std(precision_per_label)
    recall_macro_std = np.std(recall_per_label)
    f1_macro_std = np.std(f1_per_label)

    print("--- 모델 평가 결과 ---")
    print(f"전체 샘플에 대한 정확도 (Exact Match Accuracy): {accuracy:.4f}")
    print("\n--- Micro 평균 지표 ---")
    print(f"Precision (Micro): {precision_micro:.4f}")
    print(f"Recall (Micro): {recall_micro:.4f}")
    print(f"F1-Score (Micro): {f1_micro:.4f}")
    print("\n--- Macro 평균 지표 ---")
    print(f"Precision (Macro): {precision_macro:.4f}")
    print(f"Recall (Macro): {recall_macro:.4f}")
    print(f"F1-Score (Macro): {f1_macro:.4f}")
    
    print("\n--- 라벨별 지표 ---")
    for i in range(len(emotions_list)):
        print(f"{emotions_list[i]} - Precision: {precision_per_label[i]:.4f}, Recall: {recall_per_label[i]:.4f}, F1-Score: {f1_per_label[i]:.4f}")
    
    print(f"\nPrecision (Macro) 표준편차: {precision_macro_std:.4f}")
    print(f"Recall (Macro) 표준편차: {recall_macro_std:.4f}")
    print(f"F1-Score (Macro) 표준편차: {f1_macro_std:.4f}")

    return accuracy, f1_micro, f1_macro, precision_recall_fscore_support(original, predicted, average='macro')

In [34]:
def file_init():
    file_dict = {}
    file_names = {
        'persona': '../prompt/persona.txt',
        'guidelines': '../prompt/guidelines.txt',
        'output_structure': '../prompt/output_structure.txt',
        'few_shot': '../prompt/few_shot.txt',
        'few_shot_4': '../prompt/few_shot_4.txt',
        'few_shot_8': '../prompt/few_shot_8.txt',
        'few_shot_12': '../prompt/few_shot_12.txt',
        'few_shot_16': '../prompt/few_shot_16.txt',
        'few_shot_20': '../prompt/few_shot_20.txt',
        'cot': '../prompt/chain_of_thought.txt',
        'description':  '../prompt/emotion_description.txt'
    }
    for key, value in file_names.items():
        file = open(value, 'r')
        file_dict[key] = file.read()
        file.close()
    return file_dict

In [35]:
files = file_init()

In [36]:
system = f"{files['persona']}{files['description']}{files['guidelines']}{files['output_structure']}{files['few_shot_16']}"

In [11]:
data = data_init()

In [23]:
with open(f"../inputs/single_test.jsonl", "w") as f:
    k = 0
    for record in data["text"]:
        baseQuery = {
        "custom_id": f"query{k}",
        "method": "POST",
        "url": "/v1/responses",
        "body": {
                "model": "gpt-4o-mini",
                "temperature": 0.5,
                "top_p": 1.0,
                "input": [{
                    "role": "developer",
                    "content": f"{system}"
                }, 
                {
                    "role": "user",
                    "content": f"{record}"
                }], 
                "max_output_tokens": 1000,
                "text": {
                    "format": {
                        "type": "json_schema",
                        "name": "result",
                        "strict": True,
                        "schema": {
                            "type": "object",
                            "properties": {
                                "analysis": {
                                    "type": "array",
                                    "items": {
                                        "type": "object",
                                        "properties": {
                                            "emotion": {
                                                "type": "string",
                                                "enum": [ "admiration", "amusement", "anger", "annoyance", "approval", "caring", "confusion", "curiosity", "desire", "disappointment", "disapproval", "disgust", "embarrassment", "excitement", "fear", "gratitude", "grief", "joy", "love", "nervousness", "optimism", "pride", "realization", "relief", "remorse", "sadness", "surprise", "neutral" ]
                                            },
                                            "reason": {
                                                "type": "string"
                                            }
                                        },
                                        "required": ["emotion", "reason"],
                                        "additionalProperties": False
                                    }
                                }
                            },
                            "required": ["analysis"],
                            "additionalProperties": False
                        }
                    }
                }
            }
        }
        k += 1
        f.write(json.dumps(baseQuery) + "\n")

In [12]:
key_file = open('../key/openai_key.txt', 'r')
api_key = key_file.readline()
key_file.close()
client = OpenAI(api_key=api_key)

In [19]:
batch_list = []

In [25]:
batch_input_file = client.files.create(
    file=open(f"../inputs/single_test.jsonl", "rb"),
    purpose='batch'
)
print(batch_input_file)

batch_input_file_id = batch_input_file.id
create_batch=client.batches.create(
    input_file_id=batch_input_file_id,
    endpoint="/v1/responses",
    completion_window="24h",
)
batch_list.append(create_batch.id)


FileObject(id='file-WwDRN4qv4c1Y9Hc7FEXntT', bytes=55898879, created_at=1763344770, filename='single_test.jsonl', object='file', purpose='batch', status='processed', expires_at=1765936770, status_details=None)


In [26]:
for i in batch_list:
    print(i)

batch_691a81833ca481909d84795bed8ecaa1


In [13]:
batch_list = ['batch_691a81833ca481909d84795bed8ecaa1']

In [20]:
batch_res = [0] * len(batch_list)

In [21]:
cnt = 0
for i in range(len(batch_list)):
    print(i, end=" ")
    if batch_res[i] != 0:
        print("done")
        cnt += 1
        continue
    batch = client.batches.retrieve(batch_list[i])
    result = None
    if batch.status == 'completed':
        out = batch.output_file_id
        if out != None:
            cnt += 1
            print('done!')
            result = client.files.content(out)
            batch_res[i] = result
        else:
            print('error')
            result = client.files.content(batch.error_file_id).text
            batch_res[i] = result
    elif batch.status == 'failed':
        print('failed')
        print(batch.errors)
    else:
        print('it does not finish yet')
        print(batch.status)
        print(batch.request_counts)
print(cnt, "/", len(batch_res))

0 done!
1 / 1


In [24]:
print(batch_res[0])

<openai._legacy_response.HttpxBinaryResponseContent object at 0x0000029C3908FC70>


In [25]:
result = []
ek_result = []
json_res = []
for l in batch_res[0].text.split('\n')[:-1]:
    json_res.append(json.loads(l))
emotion_res = []

for l in json_res:
    tmp = []
    n = json.loads(l['response']['body']['output'][0]['content'][0]['text'])

    for m in n['analysis']:
        tmp.append(m['emotion'])

    emotion_res.append(tmp)
e = evaluation(data, emotion_res)
ek = evaluation_ekman(data, emotion_res)

--- 모델 평가 결과 ---
전체 샘플에 대한 정확도 (Exact Match Accuracy): 0.2528

--- Micro 평균 지표 ---
Precision (Micro): 0.3307
Recall (Micro): 0.2918
F1-Score (Micro): 0.3101

--- Macro 평균 지표 ---
Precision (Macro): 0.3504
Recall (Macro): 0.3394
F1-Score (Macro): 0.2994

--- 라벨별 지표 ---
admiration - Precision: 0.6449, Recall: 0.1766, F1-Score: 0.2773
amusement - Precision: 0.3315, Recall: 0.6742, F1-Score: 0.4444
anger - Precision: 0.2867, Recall: 0.6212, F1-Score: 0.3923
annoyance - Precision: 0.3133, Recall: 0.1469, F1-Score: 0.2000
approval - Precision: 0.2273, Recall: 0.1425, F1-Score: 0.1751
caring - Precision: 0.2849, Recall: 0.3926, F1-Score: 0.3302
confusion - Precision: 0.1725, Recall: 0.5752, F1-Score: 0.2655
curiosity - Precision: 0.2811, Recall: 0.3662, F1-Score: 0.3180
desire - Precision: 0.3800, Recall: 0.2289, F1-Score: 0.2857
disappointment - Precision: 0.1525, Recall: 0.2384, F1-Score: 0.1860
disapproval - Precision: 0.1850, Recall: 0.4607, F1-Score: 0.2639
disgust - Precision: 0.3596, Re

In [38]:
res_df = pd.DataFrame(result, columns=['shots', 'temperature', 'top_p', 'accuracy', 'micro f1', 'macro f1'])
ek_res_df = pd.DataFrame(ek_result, columns=['shots', 'temperature', 'top_p', 'accuracy', 'micro f1', 'macro f1'])

In [39]:
ek_res_df

Unnamed: 0,shots,temperature,top_p,accuracy,micro f1,macro f1
0,4,0.25,0.75,0.4564,0.514981,0.451085
1,4,0.0,0.25,0.454,0.513741,0.454304
2,4,0.5,1.0,0.4468,0.508494,0.446688
3,8,0.25,0.75,0.4632,0.517222,0.455611
4,8,0.0,0.25,0.4632,0.516956,0.45461
5,8,0.5,1.0,0.4588,0.513371,0.445528
6,12,0.25,0.75,0.4576,0.518671,0.460754
7,12,0.0,0.25,0.4616,0.521414,0.461851
8,12,0.5,1.0,0.4528,0.515418,0.451373
9,16,0.25,0.75,0.464,0.520496,0.456994


In [57]:
data = data_init()
test_dataset = data.drop(columns=['text'])
emotion_count = {}
for i in test_dataset.to_numpy():
    if sum(i) > 1:
        tmp = []
        for j in range(28):
            if i[j] == 1:
                tmp.append(j)
        ttmp = tuple(tmp)
        if ttmp in emotion_count:
            emotion_count[ttmp] += 1
        else:
            emotion_count[ttmp] = 1


In [58]:
emotion_res = sorted(emotion_count.items(), key=lambda x : x[1], reverse=True)

In [60]:
print(len(emotion_res))

268


In [63]:
s = 0
for i in emotion_res:
    if 27 not in i[0]:
        print(i)
        s += i[1]
           
print(s)

((2, 3), 46)
((0, 15), 30)
((0, 4), 24)
((3, 10), 21)
((0, 18), 17)
((6, 7), 14)
((3, 9), 12)
((9, 25), 12)
((0, 17), 11)
((1, 17), 11)
((13, 17), 9)
((4, 20), 9)
((0, 20), 9)
((15, 20), 9)
((15, 18), 9)
((4, 5), 8)
((24, 25), 7)
((1, 10), 7)
((0, 7), 7)
((1, 15), 7)
((1, 4), 6)
((9, 10), 6)
((4, 7), 6)
((14, 19), 6)
((1, 18), 6)
((1, 20), 5)
((18, 20), 5)
((0, 1), 5)
((2, 11), 5)
((4, 22), 5)
((1, 7), 5)
((20, 25), 5)
((3, 11), 5)
((10, 11), 5)
((4, 17), 5)
((8, 20), 5)
((4, 23), 5)
((4, 15), 5)
((5, 24), 4)
((9, 22), 4)
((2, 9), 4)
((4, 13), 4)
((10, 20), 4)
((15, 26), 4)
((6, 26), 4)
((5, 20), 4)
((1, 6), 4)
((3, 7), 4)
((7, 13), 4)
((1, 13), 4)
((1, 22), 4)
((15, 17), 4)
((0, 25), 3)
((0, 9), 3)
((0, 8), 3)
((5, 10), 3)
((4, 21), 3)
((17, 20), 3)
((9, 12), 3)
((9, 26), 3)
((2, 18), 3)
((0, 22), 3)
((4, 8), 3)
((1, 3), 3)
((2, 15), 3)
((10, 18), 3)
((5, 6), 3)
((3, 25), 3)
((0, 26), 3)
((5, 25), 3)
((17, 18), 3)
((13, 18), 3)
((5, 18), 3)
((12, 22), 2)
((7, 24), 2)
((3, 4), 2)
((2, 