In [1]:
from openai import OpenAI
import pandas as pd
import json
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import numpy as np
import matplotlib.pyplot as plt

In [2]:
ems = """
admiration
amusement
anger
annoyance
approval
caring
confusion
curiosity
desire
disappointment
disapproval
disgust
embarrassment
excitement
fear
gratitude
grief
joy
love
nervousness
optimism
pride
realization
relief
remorse
sadness
surprise
neutral
    """

In [3]:
def emotions_to_categorical(df):
    res = []

    for i in df['emotions']:
        tmp = [0 for _ in range(28)]
        for j in i:
            tmp[j] = 1
        res.append(tmp)
    tmp_df = pd.DataFrame(res, columns=ems.split())
    
    return tmp_df

In [4]:
def emotions_to_ekman(df):
    # anger disgust fear joy sadness surprise neutral
    ekman = [3, 3, 0, 0, 3, 3, 5, 5, 3, 4, 0, 1, 4, 3, 2, 3, 4, 3, 3, 2, 3, 3, 5, 3, 4, 4, 5, 6]
    res = []

    for i in df:
        tmp = [0, 0, 0, 0, 0, 0, 0]
        for j in range(len(i)):
            if i[j] == 1:
                tmp[ekman[j]] = 1
        res.append(tmp)
    tmp_df = pd.DataFrame(res, columns=['angry', 'disgust', 'fear', 'joy', 'sadness', 'surprise', 'neutral'])
    
    return tmp_df

In [5]:
def data_init(path = "../data/dev.tsv"):
    df = pd.read_csv(path, sep="\t", encoding = "utf-8", header=None)
    df.columns = ['text', 'emotions', 'id']
    df['emotions'] = list(map(lambda s : list(map(int, s.split(','))), df['emotions']))
    df = pd.concat([df, emotions_to_categorical(df)], axis=1)
    df = df.drop(columns=['emotions', 'id'])
    df['text'] = list(map(lambda s : s.replace('\\', '\\\\').replace('"', '\\"'), list(df['text']))) 
    return df.iloc[:2500, :]

In [26]:
def evaluation(original_df, emotion_res):
    emotions_list = ems.split()
    df = original_df
    predicted_df = pd.DataFrame(data = [[0 for _ in range(28)] for _ in range(len(df))], columns=emotions_list)
    for i in range(len(emotion_res)):
        for j in emotion_res[i]:
            if j in emotions_list:
                predicted_df.loc[i, j] = 1
    predicted = predicted_df.to_numpy()
    original = df.iloc[:,1:].to_numpy()
    
    
    accuracy = accuracy_score(original, predicted)
    
    precision_micro, recall_micro, f1_micro, _ = precision_recall_fscore_support(
        original, predicted, average='micro'
    )
    precision_macro, recall_macro, f1_macro, _ = precision_recall_fscore_support(
        original, predicted, average='macro'
    )
    
    precision_per_label, recall_per_label, f1_per_label, _ = precision_recall_fscore_support(
        original, predicted, average=None
    )

    precision_macro_std = np.std(precision_per_label)
    recall_macro_std = np.std(recall_per_label)
    f1_macro_std = np.std(f1_per_label)

    print("--- 모델 평가 결과 ---")
    print(f"전체 샘플에 대한 정확도 (Exact Match Accuracy): {accuracy:.4f}")
    print("\n--- Micro 평균 지표 ---")
    print(f"Precision (Micro): {precision_micro:.4f}")
    print(f"Recall (Micro): {recall_micro:.4f}")
    print(f"F1-Score (Micro): {f1_micro:.4f}")
    print("\n--- Macro 평균 지표 ---")
    print(f"Precision (Macro): {precision_macro:.4f}")
    print(f"Recall (Macro): {recall_macro:.4f}")
    print(f"F1-Score (Macro): {f1_macro:.4f}")
    
    print("\n--- 라벨별 지표 ---")
    for i in range(len(emotions_list)):
        print(f"{emotions_list[i]} - Precision: {precision_per_label[i]:.4f}, Recall: {recall_per_label[i]:.4f}, F1-Score: {f1_per_label[i]:.4f}")
    
    print(f"\nPrecision (Macro) 표준편차: {precision_macro_std:.4f}")
    print(f"Recall (Macro) 표준편차: {recall_macro_std:.4f}")
    print(f"F1-Score (Macro) 표준편차: {f1_macro_std:.4f}")

    return accuracy, f1_micro, f1_macro, precision_recall_fscore_support(original, predicted, average='macro')

In [27]:
def evaluation_ekman(original_df, emotion_res):
    emotions_list = 'anger disgust fear joy sadness surprise neutral'.split()
    predicted_df = pd.DataFrame(data = [[0 for _ in range(28)] for _ in range(len(original_df))], columns=ems.split())
    for i in range(len(emotion_res)):
        for j in emotion_res[i]:
            if j in ems.split():
                predicted_df.loc[i, j] = 1
    predicted = emotions_to_ekman(predicted_df.to_numpy()).to_numpy()
    original = emotions_to_ekman(original_df.iloc[:,1:].to_numpy()).to_numpy()

    accuracy = accuracy_score(original, predicted)
    
    precision_micro, recall_micro, f1_micro, _ = precision_recall_fscore_support(
        original, predicted, average='micro'
    )
    precision_macro, recall_macro, f1_macro, _ = precision_recall_fscore_support(
        original, predicted, average='macro'
    )
    
    precision_per_label, recall_per_label, f1_per_label, _ = precision_recall_fscore_support(
        original, predicted, average=None
    )

    precision_macro_std = np.std(precision_per_label)
    recall_macro_std = np.std(recall_per_label)
    f1_macro_std = np.std(f1_per_label)

    print("--- 모델 평가 결과 ---")
    print(f"전체 샘플에 대한 정확도 (Exact Match Accuracy): {accuracy:.4f}")
    print("\n--- Micro 평균 지표 ---")
    print(f"Precision (Micro): {precision_micro:.4f}")
    print(f"Recall (Micro): {recall_micro:.4f}")
    print(f"F1-Score (Micro): {f1_micro:.4f}")
    print("\n--- Macro 평균 지표 ---")
    print(f"Precision (Macro): {precision_macro:.4f}")
    print(f"Recall (Macro): {recall_macro:.4f}")
    print(f"F1-Score (Macro): {f1_macro:.4f}")
    
    print("\n--- 라벨별 지표 ---")
    for i in range(len(emotions_list)):
        print(f"{emotions_list[i]} - Precision: {precision_per_label[i]:.4f}, Recall: {recall_per_label[i]:.4f}, F1-Score: {f1_per_label[i]:.4f}")
    
    print(f"\nPrecision (Macro) 표준편차: {precision_macro_std:.4f}")
    print(f"Recall (Macro) 표준편차: {recall_macro_std:.4f}")
    print(f"F1-Score (Macro) 표준편차: {f1_macro_std:.4f}")

    return accuracy, f1_micro, f1_macro, precision_recall_fscore_support(original, predicted, average='macro')

In [8]:
def file_init():
    file_dict = {}
    file_names = {
        'persona': './prompt/persona.txt',
        'guidelines': './prompt/guidelines.txt',
        'output_structure': './prompt/output_structure.txt',
        'few_shot': './prompt/few_shot.txt',
        'few_shot_4': './prompt/few_shot_4.txt',
        'few_shot_8': './prompt/few_shot_8.txt',
        'few_shot_12': './prompt/few_shot_12.txt',
        'few_shot_16': './prompt/few_shot_16.txt',
        'few_shot_20': './prompt/few_shot_20.txt',
        'cot': './prompt/chain_of_thought.txt',
        'description':  './prompt/emotion_description.txt'
    }
    for key, value in file_names.items():
        file = open(value, 'r')
        file_dict[key] = file.read()
        file.close()
    return file_dict

In [9]:
files = file_init()

In [10]:
systems = {
    4: f"{files['persona']}{files['description']}{files['guidelines']}{files['output_structure']}{files['few_shot_4']}",
    8: f"{files['persona']}{files['description']}{files['guidelines']}{files['output_structure']}{files['few_shot_8']}",
    12: f"{files['persona']}{files['description']}{files['guidelines']}{files['output_structure']}{files['few_shot_12']}",
    16: f"{files['persona']}{files['description']}{files['guidelines']}{files['output_structure']}{files['few_shot_16']}",
    20: f"{files['persona']}{files['description']}{files['guidelines']}{files['output_structure']}{files['few_shot_20']}"
}

In [11]:
data = data_init()

In [17]:
it = [(0.25, 0.75), (0.00, 0.25),(0.50, 1.00)]

In [66]:
for key, value in systems.items():
    i = 0
    for j in it:
        with open(f"./inputs/few_shot_grid/version2/few_shot_grid_{key}_{i}.jsonl", "w") as f:
            k = 0
            for record in data["text"]:
                baseQuery = {
                "custom_id": f"query{k}",
                "method": "POST",
                "url": "/v1/responses",
                "body": {
                        "model": "gpt-4o-mini",
                        "temperature": j[0],
                        "top_p": j[1],
                        "input": [{
                            "role": "developer",
                            "content": f"{value}"
                        }, 
                        {
                            "role": "user",
                            "content": f"{record}"
                        }], 
                        "max_output_tokens": 1000,
                        "text": {
                            "format": {
                                "type": "json_schema",
                                "name": "result",
                                "strict": True,
                                "schema": {
                                    "type": "object",
                                    "properties": {
                                        "analysis": {
                                            "type": "array",
                                            "items": {
                                                "type": "object",
                                                "properties": {
                                                    "emotion": {
                                                        "type": "string",
                                                        "enum": [ "admiration", "amusement", "anger", "annoyance", "approval", "caring", "confusion", "curiosity", "desire", "disappointment", "disapproval", "disgust", "embarrassment", "excitement", "fear", "gratitude", "grief", "joy", "love", "nervousness", "optimism", "pride", "realization", "relief", "remorse", "sadness", "surprise", "neutral" ]
                                                    },
                                                    "reason": {
                                                        "type": "string"
                                                    }
                                                },
                                                "required": ["emotion", "reason"],
                                                "additionalProperties": False
                                            }
                                        }
                                    },
                                    "required": ["analysis"],
                                    "additionalProperties": False
                                }
                            }
                        }
                    }
                }
                k += 1
                f.write(json.dumps(baseQuery) + "\n")
        i += 1

In [13]:
key_file = open('./key/openai_key.txt', 'r')
api_key = key_file.readline()
key_file.close()
client = OpenAI(api_key=api_key)

In [93]:
batch_list = []

In [96]:
for i in range(20, 21, 4):
    for j in range(2, 3):
        batch_input_file = client.files.create(
            file=open(f"./inputs/few_shot_grid/version2/few_shot_grid_{i}_{j}.jsonl", "rb"),
            purpose='batch'
        )
        print(batch_input_file)
        
        batch_input_file_id = batch_input_file.id
        create_batch=client.batches.create(
            input_file_id=batch_input_file_id,
            endpoint="/v1/responses",
            completion_window="24h",
        )
        batch_list.append(create_batch.id)


FileObject(id='file-Y4uMoNsQfFa87VzGHPpMy7', bytes=29057821, created_at=1762145807, filename='few_shot_grid_20_2.jsonl', object='file', purpose='batch', status='processed', expires_at=1764737807, status_details=None)


In [73]:
for i in batch_list:
    print(i)

batch_690812716c3481909126d96df05787a9
batch_69081289c3648190acf68565aeb6f3c5
batch_690812a18dd881908da20eee396e18b7
batch_690812bd536c8190a484251bdf45bc08
batch_690812d8837881909669e212451a3f9c
batch_690812f417d88190be3cb7974508c624
batch_69081312faa08190a5a4ea0498a3bdf7
batch_690813326a008190a1fa91e968abcb21
batch_690813528d488190a152474917246d77
batch_69081d0790788190acd811022212212a
batch_69081d23e14481908d88e0a2eb8e7ca1
batch_69081d3b698c81909b8818ed1c973aeb
batch_69081d5933a481909b17ef0fbf593c46
batch_69081d73d5bc8190ac048b1741b3e28d
batch_69081d8ea1fc819092550a0a6c9537cc


In [52]:
batch_res = [0] * len(batch_list)

In [53]:
cnt = 0
for i in range(len(batch_list)):
    print(i, end=" ")
    if batch_res[i] != 0:
        print("done")
        cnt += 1
        continue
    batch = client.batches.retrieve(batch_list[i])
    result = None
    if batch.status == 'completed':
        out = batch.output_file_id
        if out != None:
            cnt += 1
            print('done!')
            result = client.files.content(out)
            batch_res[i] = result
        else:
            print('error')
            result = client.files.content(batch.error_file_id).text
            batch_res[i] = result
    elif batch.status == 'failed':
        print('failed')
        print(batch.errors)
    else:
        print('it does not finish yet')
        print(batch.status)
        print(batch.request_counts)
print(cnt, "/", len(batch_res))

0 done!
1 done!
2 done!
3 done!
4 done!
5 done!
6 done!
7 done!
8 done!
9 done!
10 it does not finish yet
in_progress
BatchRequestCounts(completed=2497, failed=0, total=2500)
11 it does not finish yet
in_progress
BatchRequestCounts(completed=2491, failed=0, total=2500)
12 it does not finish yet
in_progress
BatchRequestCounts(completed=2444, failed=16, total=2500)
13 done!
14 it does not finish yet
in_progress
BatchRequestCounts(completed=1666, failed=12, total=2500)
11 / 15


In [12]:
batch_res = []

for i in range(4, 21, 4):
    for j in range(3):
        file = open(f'./output/20251103/adjusted_fewshot_{i}_{j}.jsonl', 'r')
        batch_res.append(file.read())
        file.close()

In [40]:
k = 0
result = []
ek_result = []
for i in range(4, 21, 4):
    for j in it:
        json_res = []
        for l in batch_res[k].split('\n')[:-1]:
            json_res.append(json.loads(l))
        emotion_res = []

        for l in json_res:
            tmp = []
            n = json.loads(l['response']['body']['output'][0]['content'][0]['text'])
        
            for m in n['analysis']:
                tmp.append(m['emotion'])
        
            emotion_res.append(tmp)
        print(f'------------------------{i}-{j}------------------------')
        print(data)
        e = evaluation(data, emotion_res)
        ek = evaluation_ekman(data, emotion_res)
        result.append([i]+list(j)+[e[0], e[1], e[2]])
        ek_result.append([i]+list(j)+[ek[0], ek[1], ek[2]])
        k += 1

------------------------4-(0.25, 0.75)------------------------
                                                   text  admiration  \
0     Is this in New Orleans?? I really feel like th...           0   
1     You know the answer man, you are programmed to...           0   
2                  I've never been this sad in my life!           0   
3     The economy is heavily controlled and subsidiz...           0   
4     He could have easily taken a real camera from ...           0   
...                                                 ...         ...   
2495                    You got this, good luck brother           0   
2496                        You smile warmly at people.           1   
2497  yuck, I'd be embarrassed to walk around with a...           0   
2498  Ok then youre leaving the enmey widow uncontes...           1   
2499  Oh, also forgot to add, LOTION! ALWAYS lotion ...           0   

      amusement  anger  annoyance  approval  caring  confusion  curiosity  \
0      

In [38]:
res_df = pd.DataFrame(result, columns=['shots', 'temperature', 'top_p', 'accuracy', 'micro f1', 'macro f1'])
ek_res_df = pd.DataFrame(ek_result, columns=['shots', 'temperature', 'top_p', 'accuracy', 'micro f1', 'macro f1'])

In [39]:
ek_res_df

Unnamed: 0,shots,temperature,top_p,accuracy,micro f1,macro f1
0,4,0.25,0.75,0.4564,0.514981,0.451085
1,4,0.0,0.25,0.454,0.513741,0.454304
2,4,0.5,1.0,0.4468,0.508494,0.446688
3,8,0.25,0.75,0.4632,0.517222,0.455611
4,8,0.0,0.25,0.4632,0.516956,0.45461
5,8,0.5,1.0,0.4588,0.513371,0.445528
6,12,0.25,0.75,0.4576,0.518671,0.460754
7,12,0.0,0.25,0.4616,0.521414,0.461851
8,12,0.5,1.0,0.4528,0.515418,0.451373
9,16,0.25,0.75,0.464,0.520496,0.456994
