In [1]:
from openai import OpenAI
import pandas as pd
import json
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import numpy as np
import matplotlib.pyplot as plt

In [2]:
ems = ['admiration', 'amusement', 'anger', 'annoyance', 'approval', 'caring', 'confusion', 'curiosity', 'desire', 'disappointment', 'disapproval', 'disgust', 'embarrassment', 'excitement', 'fear', 'gratitude', 'grief', 'joy', 'love', 'nervousness', 'optimism', 'pride', 'realization', 'relief', 'remorse', 'sadness', 'surprise', 'neutral']

In [3]:
def emotions_to_categorical(df):
    res = []

    for i in df['emotions']:
        tmp = [0 for _ in range(28)]
        for j in i:
            tmp[j] = 1
        res.append(tmp)
    tmp_df = pd.DataFrame(res, columns=ems)
    
    return tmp_df

In [4]:
def emotions_to_ekman(df):
    # anger disgust fear joy sadness surprise neutral
    ekman = [3, 3, 0, 0, 3, 3, 5, 5, 3, 4, 0, 1, 4, 3, 2, 3, 4, 3, 3, 2, 3, 3, 5, 3, 4, 4, 5, 6]
    res = []

    for i in df:
        tmp = [0, 0, 0, 0, 0, 0, 0]
        for j in range(len(i)):
            if i[j] == 1:
                tmp[ekman[j]] = 1
        res.append(tmp)
    tmp_df = pd.DataFrame(res, columns=['angry', 'disgust', 'fear', 'joy', 'sadness', 'surprise', 'neutral'])
    
    return tmp_df

In [5]:
def data_init(path = "../../data/test.tsv"):
    df = pd.read_csv(path, sep="\t", encoding = "utf-8", header=None)
    df.columns = ['text', 'emotions', 'id']
    df['emotions'] = list(map(lambda s : list(map(int, s.split(','))), df['emotions']))
    df = pd.concat([df, emotions_to_categorical(df)], axis=1)
    df = df.drop(columns=['emotions'])
    df['text'] = list(map(lambda s : s.replace('\\', '\\\\').replace('"', '\\"'), list(df['text']))) 
    # return df.iloc[:2500, :]
    return df

In [6]:
def evaluation(original_df, emotion_res):
    emotions_list = ems
    df = original_df
    predicted_df = pd.DataFrame(data = [[0 for _ in range(28)] for _ in range(len(df))], columns=emotions_list)
    for i in range(len(emotion_res)):
        for j in emotion_res[i]:
            if j in emotions_list:
                predicted_df.loc[i, j] = 1
    predicted = predicted_df.to_numpy()
    original = df.to_numpy()
    
    accuracy = accuracy_score(original, predicted)
    
    precision_micro, recall_micro, f1_micro, _ = precision_recall_fscore_support(
        original, predicted, average='micro'
    )
    precision_macro, recall_macro, f1_macro, _ = precision_recall_fscore_support(
        original, predicted, average='macro'
    )
    
    precision_per_label, recall_per_label, f1_per_label, _ = precision_recall_fscore_support(
        original, predicted, average=None
    )

    precision_macro_std = np.std(precision_per_label)
    recall_macro_std = np.std(recall_per_label)
    f1_macro_std = np.std(f1_per_label)

    print("--- 모델 평가 결과 ---")
    print(f"전체 샘플에 대한 정확도 (Exact Match Accuracy): {accuracy:.4f}")
    print("\n--- Micro 평균 지표 ---")
    print(f"Precision (Micro): {precision_micro:.4f}")
    print(f"Recall (Micro): {recall_micro:.4f}")
    print(f"F1-Score (Micro): {f1_micro:.4f}")
    print("\n--- Macro 평균 지표 ---")
    print(f"Precision (Macro): {precision_macro:.4f}")
    print(f"Recall (Macro): {recall_macro:.4f}")
    print(f"F1-Score (Macro): {f1_macro:.4f}")
    
    print("\n--- 라벨별 지표 ---")
    for i in range(len(emotions_list)):
        print(f"{emotions_list[i]} - Precision: {precision_per_label[i]:.4f}, Recall: {recall_per_label[i]:.4f}, F1-Score: {f1_per_label[i]:.4f}")
    
    print(f"\nPrecision (Macro) 표준편차: {precision_macro_std:.4f}")
    print(f"Recall (Macro) 표준편차: {recall_macro_std:.4f}")
    print(f"F1-Score (Macro) 표준편차: {f1_macro_std:.4f}")

    return accuracy, f1_micro, f1_macro, precision_recall_fscore_support(original, predicted, average='macro')

In [7]:
def evaluation_ekman(original_df, emotion_res):
    emotions_list = 'anger disgust fear joy sadness surprise neutral'.split()
    predicted_df = pd.DataFrame(data = [[0 for _ in range(28)] for _ in range(len(original_df))], columns=ems)
    for i in range(len(emotion_res)):
        for j in emotion_res[i]:
            if j in ems:
                predicted_df.loc[i, j] = 1
    predicted = emotions_to_ekman(predicted_df.to_numpy()).to_numpy()
    original = emotions_to_ekman(original_df.to_numpy()).to_numpy()

    accuracy = accuracy_score(original, predicted)
    
    precision_micro, recall_micro, f1_micro, _ = precision_recall_fscore_support(
        original, predicted, average='micro'
    )
    precision_macro, recall_macro, f1_macro, _ = precision_recall_fscore_support(
        original, predicted, average='macro'
    )
    
    precision_per_label, recall_per_label, f1_per_label, _ = precision_recall_fscore_support(
        original, predicted, average=None
    )

    precision_macro_std = np.std(precision_per_label)
    recall_macro_std = np.std(recall_per_label)
    f1_macro_std = np.std(f1_per_label)

    print("--- 모델 평가 결과 ---")
    print(f"전체 샘플에 대한 정확도 (Exact Match Accuracy): {accuracy:.4f}")
    print("\n--- Micro 평균 지표 ---")
    print(f"Precision (Micro): {precision_micro:.4f}")
    print(f"Recall (Micro): {recall_micro:.4f}")
    print(f"F1-Score (Micro): {f1_micro:.4f}")
    print("\n--- Macro 평균 지표 ---")
    print(f"Precision (Macro): {precision_macro:.4f}")
    print(f"Recall (Macro): {recall_macro:.4f}")
    print(f"F1-Score (Macro): {f1_macro:.4f}")
    
    print("\n--- 라벨별 지표 ---")
    for i in range(len(emotions_list)):
        print(f"{emotions_list[i]} - Precision: {precision_per_label[i]:.4f}, Recall: {recall_per_label[i]:.4f}, F1-Score: {f1_per_label[i]:.4f}")
    
    print(f"\nPrecision (Macro) 표준편차: {precision_macro_std:.4f}")
    print(f"Recall (Macro) 표준편차: {recall_macro_std:.4f}")
    print(f"F1-Score (Macro) 표준편차: {f1_macro_std:.4f}")

    return accuracy, f1_micro, f1_macro, precision_recall_fscore_support(original, predicted, average='macro')

In [8]:
def file_init():
    file_dict = {}
    basicPath = '../prompt/multi_v3'
    file_names = {
        'tier0_persona': basicPath+'/tier0/persona.txt',
        'tier0_guidelines': basicPath+'/tier0/guidelines.txt',
        'tier0_description': basicPath+'/tier0/description.txt',
        'tier1_persona': basicPath+'/tier1/persona.txt',
        'tier1_guidelines': basicPath+'/tier1/guidelines.txt',
        'tier1_description': basicPath+'/tier1/description.txt',
        'advocate1_persona': basicPath+'/advocate1/persona.txt',
        'advocate1_guidelines': basicPath+'/advocate1/guidelines.txt',
        'advocate2_persona': basicPath+'/advocate2/persona.txt',
        'advocate2_guidelines': basicPath+'/advocate2/guidelines.txt',
        'judge_persona': basicPath+'/judge/persona.txt',
        'judge_guidelines': basicPath+'/judge/guidelines.txt',
        'fewshot': basicPath+'/fewshot.txt',
    }
    for key, value in file_names.items():
        file = open(value, 'r')
        file_dict[key] = file.read()
        file.close()
    return file_dict

In [9]:
data = data_init()

In [10]:
key_file = open('../key/openai_key.txt', 'r')
api_key = key_file.readline()
key_file.close()
client = OpenAI(api_key=api_key)

In [11]:
class Agent:
    def __init__(self, system, format, parent, child, filename, outputCol, input, inputCol):
        self.query = {
        "custom_id": "",
        "method": "POST",
        "url": "/v1/responses",
        "body": {
                "model": "gpt-4o-mini",
                "temperature": 0.5,
                "top_p": 1.0,
                "input": [{
                    "role": "developer",
                    "content": f"{system}"
                }, 
                {
                    "role": "user",
                    "content": ""
                }], 
                "max_output_tokens": 1000,
                "text": {
                    "format": format
                }
            }
        }
        self.parent = parent
        self.child = None
        if isinstance(child, list):
            self.child = child
        else:
            self.child = [child]
        self.filename = filename
        self.batch_id = None
        self.batch_res = None
        self.emotion_res = []
        self.outputCol = outputCol
        self.inputCol = inputCol
        self.input = input
        self.output = None
    
    def make_jsonl(self):
        tmp_query = self.query
        with open(self.filename, "w") as f:
            k = 0
            for i in range(len(self.input)):
                record = {}
                for j in self.inputCol:
                    record[j] = self.input.loc[i, j]
                tmp_query['custom_id'] = f"query{k}"
                tmp_query['body']['input'][1]['content'] = json.dumps(record)
                k += 1
                f.write(json.dumps(tmp_query) + "\n")
            
    def run(self):
        batch_input_file = client.files.create(
            file=open(self.filename, "rb"),
            purpose='batch'
        )
        print(batch_input_file)

        batch_input_file_id = batch_input_file.id
        create_batch=client.batches.create(
            input_file_id=batch_input_file_id,
            endpoint="/v1/responses",
            completion_window="24h",
        )
        self.batch_id = create_batch.id

    def check_status(self):
        batch = client.batches.retrieve(self.batch_id)
        result = None
        if batch.status == 'completed':
            out = batch.output_file_id
            if out != None:
                print('done!')
                result = client.files.content(out)
                self.batch_res = result
                return 1
            else:
                print('error')
                result = client.files.content(batch.error_file_id).text
                self.batch_res = result
                return -2
        elif batch.status == 'failed':
            print('failed')
            print(batch.errors)
            return -1
        else:
            print('it does not finish yet')
            print(batch.status)
            print(batch.request_counts)
            return 0
    
    def parse_result(self):
        status = self.check_status()
        if status != 1:
            print('can not parse result')
            return
        json_res = []
        for i in self.batch_res.text.split('\n')[:-1]:
            json_res.append(json.loads(i))

        for i in json_res:
            tmp = [[] for _ in range(len(self.outputCol))]
            n = json.loads(i['response']['body']['output'][0]['content'][0]['text'])

            for k in range(len(self.outputCol)):
                tmp[k].append(n['analysis'][self.outputCol[k]])

            self.emotion_res.append(tmp)
        tmp_df = pd.DataFrame(data=self.emotion_res, columns=self.outputCol)
        self.output = pd.concat([self.input, tmp_df], axis=1)


In [12]:
multi_tier0_format = {
    "type": "json_schema",
    "name": "result",
    "strict": True,
    "schema": {
        "type": "object",
        "properties": {
            "analysis": {
                "type": "object",
                "properties": {
                    "isNeutral": {
                        "type": "boolean",
                        "description": "If the text shows little emotion : True"
                    }
                },
                "required": ["isNeutral"],
                "additionalProperties": False
            }
        },
        "required": ["analysis"],
        "additionalProperties": False
    }
}

In [13]:
files = file_init()
tier0_system = f"{files['tier0_persona']}{files['tier0_guidelines']}{files['tier1_description']}"
tier0 = Agent(tier0_system, multi_tier0_format, None, None, "../inputs/multi/v3/tier0.jsonl", ["isNeutral"], data, ['text'])

In [14]:
tier0.make_jsonl()
tier0.run()

FileObject(id='file-CFXEW17H2ACprfJpdbx2zD', bytes=21049408, created_at=1764218600, filename='tier0.jsonl', object='file', purpose='batch', status='processed', expires_at=1766810600, status_details=None)


In [14]:
tier0.batch_id = 'batch_6927d6e93da88190a33d9d767f33e460'
print(tier0.batch_id)

batch_6927d6e93da88190a33d9d767f33e460


In [15]:
tier0.check_status()

done!


1

In [16]:
json_res = []
for i in tier0.batch_res.text.split('\n')[:-1]:
    json_res.append(json.loads(i))
cnt = 0

json_res[2730]['response']['body']['output'][0]['content'][0]['text'] = '{"analysis":{"isNeutral":true}}'


In [17]:
for i in json_res:
    tmp = [[] for _ in range(len(tier0.outputCol))]
    print(cnt, i['response']['body']['output'][0]['content'][0]['text'])
    n = json.loads(i['response']['body']['output'][0]['content'][0]['text'])
    cnt += 1
    for k in range(len(tier0.outputCol)):
        tmp[k].append(n['analysis'][tier0.outputCol[k]])

    tier0.emotion_res.append(tmp)
tmp_df = pd.DataFrame(data=tier0.emotion_res, columns=tier0.outputCol)
tier0.output = pd.concat([tier0.input, tmp_df], axis=1)

0 {"analysis":{"isNeutral":false}}
1 {"analysis":{"isNeutral":false}}
2 {"analysis":{"isNeutral":false}}
3 {"analysis":{"isNeutral":false}}
4 {"analysis":{"isNeutral":false}}
5 {"analysis":{"isNeutral":false}}
6 {"analysis":{"isNeutral":true}}
7 {"analysis":{"isNeutral":false}}
8 {"analysis":{"isNeutral":false}}
9 {"analysis":{"isNeutral":false}}
10 {"analysis":{"isNeutral":false}}
11 {"analysis":{"isNeutral":false}}
12 {"analysis":{"isNeutral":false}}
13 {"analysis":{"isNeutral":false}}
14 {"analysis":{"isNeutral":false}}
15 {"analysis":{"isNeutral":false}}
16 {"analysis":{"isNeutral":false}}
17 {"analysis":{"isNeutral":true}}
18 {"analysis":{"isNeutral":false}}
19 {"analysis":{"isNeutral":false}}
20 {"analysis":{"isNeutral":false}}
21 {"analysis":{"isNeutral":false}}
22 {"analysis":{"isNeutral":false}}
23 {"analysis":{"isNeutral":true}}
24 {"analysis":{"isNeutral":false}}
25 {"analysis":{"isNeutral":true}}
26 {"analysis":{"isNeutral":false}}
27 {"analysis":{"isNeutral":false}}
28 {"a

In [18]:
tmp = []
for i in tier0.output['isNeutral'].to_list():
    tmp.append(i[0])

tier0.output['isNeutral'] = tmp

In [19]:
tier0.output.loc[tier0.output['isNeutral'] == True]

Unnamed: 0,text,id,admiration,amusement,anger,annoyance,approval,caring,confusion,curiosity,...,nervousness,optimism,pride,realization,relief,remorse,sadness,surprise,neutral,isNeutral
6,You’re welcome,efdbh17,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,True
17,if the pain doesn't go away after 4 hours or s...,eezp1cd,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,1,True
23,"Well, there's cubs and otters too.",edr2ac7,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,True
25,"Again, overall, not just for me.",efcweyl,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,True
30,NJ has zero of their own picks from the 2010 d...,edlt8ec,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5393,"Me, joining in everyone in the room saying it ...",ee5dx9l,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,True
5397,I heard of people doing that in the dorms fres...,ef2pdzn,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,True
5407,If this happens in my dream then I will get up...,eer7q6k,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,True
5418,The essay is optional.,eezc65u,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,True


In [20]:
multi_tier1_format = {
    "type": "json_schema",
    "name": "result",
    "strict": True,
    "schema": {
        "type": "object",
        "properties": {
            "analysis": {
                "type": "object",
                "properties": {
                    "isConfident": {
                        "type": "boolean",
                        "description": "If emotion1 is definite emotion, True / If it is hard to distinguish between emotion1 and emotion2, False"
                    },
                    "emotion1": {
                        "type": "array",
                        "description": "first dominent emotion(s)",
                        "items": {
                            "type": "string",
                            "enum": [ "admiration", "amusement", "anger", "annoyance", "approval", "caring", "confusion", "curiosity", "desire", "disappointment", "disapproval", "disgust", "embarrassment", "excitement", "fear", "gratitude", "grief", "joy", "love", "nervousness", "optimism", "pride", "realization", "relief", "remorse", "sadness", "surprise", "neutral" ]
                        }
                    },
                    "emotion2": {
                        "type": "array",
                        "description": "second dominent emotion(s)",
                        "items": {
                            "type": "string",
                            "enum": [ "admiration", "amusement", "anger", "annoyance", "approval", "caring", "confusion", "curiosity", "desire", "disappointment", "disapproval", "disgust", "embarrassment", "excitement", "fear", "gratitude", "grief", "joy", "love", "nervousness", "optimism", "pride", "realization", "relief", "remorse", "sadness", "surprise", "neutral" ]
                        }
                    }
                },
                "required": ["isConfident", "emotion1", "emotion2"],
                "additionalProperties": False
            }
        },
        "required": ["analysis"],
        "additionalProperties": False
    }
}

In [54]:
files = file_init()
tier1_system = f"{files['tier1_persona']}{files['tier1_guidelines']}{files['tier1_description']}{files['fewshot']}"
tier1 = Agent(tier1_system, multi_tier1_format, None, None, "../inputs/multi/v3/tier1.jsonl", ["isConfident", "emotion1", "emotion2"], tier0.output.loc[tier0.output.isNeutral==False].reset_index().drop(columns=['isNeutral', 'index']), ['text'])
#batch_6925040ea1e88190b3442f328158527d

In [23]:
tier1.make_jsonl()
tier1.run()

FileObject(id='file-TyTEYaqqdJqYbpJsTN7kP3', bytes=35556006, created_at=1764222148, filename='tier1.jsonl', object='file', purpose='batch', status='processed', expires_at=1766814148, status_details=None)


In [55]:
tier1.batch_id = 'batch_6927e4c53e1c81909422cf7941447983'

In [56]:
tier1.parse_result()

done!


In [57]:
tmp = []
for i in tier1.output['isConfident'].to_list():
    tmp.append(i[0])
tier1.output['isConfident'] = tmp
tmp = []
for i in tier1.output['emotion1'].to_list():
    tmp.append(i[0])
tier1.output['emotion1'] = tmp
tmp = []
for i in tier1.output['emotion2'].to_list():
    tmp.append(i[0])
tier1.output['emotion2'] = tmp

In [63]:
tier1.output.loc[3628, 'isConfident'] = True

In [64]:
notconf = tier1.output.loc[tier1.output.isConfident==False,:]
notconf.reset_index(inplace=True)

In [65]:
for i in range(len(notconf['emotion2'].to_list())):
    if len(notconf['emotion2'][i]) == 0:
        print(notconf['emotion1'][i], i, notconf['index'][i])


In [66]:
advocate1_1_in = notconf.rename(columns={'emotion1': 'assigned_emotion', 'emotion2': 'opponent_emotion'})
advocate1_2_in = notconf.rename(columns={'emotion2': 'assigned_emotion', 'emotion1': 'opponent_emotion'})

In [67]:
multi_advocate1_format = {
    "type": "json_schema",
    "name": "result",
    "strict": True,
    "schema": {
        "type": "object",
        "properties": {
            "analysis": {
                "type": "object",
                "properties": {
                    "round_1_argument": {
                        "type": "string",
                        "description": "Your initial persuasive argument text."
                    }
                },
                "required": ["round_1_argument"],
                "additionalProperties": False
            }
        },
        "required": ["analysis"],
        "additionalProperties": False
    }
}

In [68]:
files = file_init()
advocate1_system = f"{files['advocate1_persona']}{files['advocate1_guidelines']}{files['tier1_description']}{files['fewshot']}"
advocate1_1 = Agent(advocate1_system, multi_advocate1_format, tier1, None, "../inputs/multi/v3/emotion1_advocate1.jsonl", ["round_1_argument"], advocate1_1_in, ['text', 'assigned_emotion', 'opponent_emotion'])
#batch_6923c45889f88190848f9e028619056b

In [69]:
advocate1_1.make_jsonl()
advocate1_1.run()

FileObject(id='file-URpzcSyXMohP8T5KDXrWDo', bytes=1459942, created_at=1764223865, filename='emotion1_advocate1.jsonl', object='file', purpose='batch', status='processed', expires_at=1766815865, status_details=None)


In [70]:
advocate1_2 = Agent(advocate1_system, multi_advocate1_format, tier1, None, "../inputs/multi/v3/emotion2_advocate1.jsonl", ["round_1_argument"], advocate1_2_in, ['text', 'assigned_emotion', 'opponent_emotion'])
#batch_6923c46278f08190a266debe4a65766a

In [71]:
advocate1_2.make_jsonl()
advocate1_2.run()

FileObject(id='file-HAehhHsFK1yKEL7uoxQesT', bytes=1459942, created_at=1764223869, filename='emotion2_advocate1.jsonl', object='file', purpose='batch', status='processed', expires_at=1766815869, status_details=None)


In [72]:
advocate1_1.parse_result()

done!


In [73]:
advocate1_2.parse_result()

done!


In [74]:
multi_advocate2_format = {
    "type": "json_schema",
    "name": "result",
    "strict": True,
    "schema": {
        "type": "object",
        "properties": {
            "analysis": {
                "type": "object",
                "properties": {
                    "round_2_rebuttal": {
                        "type": "string",
                        "description": "Your final rebuttal and closing argument."
                    }
                },
                "required": ["round_2_rebuttal"],
                "additionalProperties": False
            }
        },
        "required": ["analysis"],
        "additionalProperties": False
    }
}

In [75]:
advocate2_1_in = advocate1_1.output.rename(columns={'round_1_argument': 'my_argument_r1'})
advocate2_1_in['opponent_argument_r1'] = advocate1_2.output['round_1_argument']
advocate2_2_in = advocate1_2.output.rename(columns={'round_1_argument': 'my_argument_r1'})
advocate2_2_in['opponent_argument_r1'] = advocate1_1.output['round_1_argument']

In [104]:
files = file_init()
advocate2_system = f"{files['advocate2_persona']}{files['advocate2_guidelines']}{files['tier1_description']}"
advocate2_1 = Agent(advocate2_system, multi_advocate2_format, advocate1_1, None, "../inputs/multi/v3/emotion1_advocate2.jsonl", ["round_2_rebuttal"], advocate2_1_in, ['text', 'assigned_emotion', 'my_argument_r1', 'opponent_argument_r1'])
#batch_6923c595d38c819083fe6855f8d0e502

In [77]:
advocate2_1.make_jsonl()
advocate2_1.run()

FileObject(id='file-AqHAuTkxQHwi2dLD4RAU4Q', bytes=1421030, created_at=1764224949, filename='emotion1_advocate2.jsonl', object='file', purpose='batch', status='processed', expires_at=1766816949, status_details=None)


In [105]:
#batch_6923c59f9a2c8190b44e7303f964a31b
advocate2_2 = Agent(advocate2_system, multi_advocate2_format, advocate1_2, None, "../inputs/multi/v3/emotion2_advocate2.jsonl", ["round_2_rebuttal"], advocate2_2_in, ['text', 'assigned_emotion', 'my_argument_r1', 'opponent_argument_r1'])

In [79]:
advocate2_2.make_jsonl()
advocate2_2.run()

FileObject(id='file-WyuMfSF2zPtGu6Bz8GBiaB', bytes=1421130, created_at=1764224952, filename='emotion2_advocate2.jsonl', object='file', purpose='batch', status='processed', expires_at=1766816952, status_details=None)


In [106]:
advocate2_1.batch_id = 'batch_6927efb5f47c8190b98d71df132c3507'
advocate2_2.batch_id = 'batch_6927efb8f58c819084518ffec14426ab'

In [95]:
json_res = []
for i in advocate2_1.batch_res.text.split('\n')[:-1]:
    json_res.append(json.loads(i))
json_res[168]['response']['body']['output'][0]['content'][0]['text'] = '{"analysis":{"round_2_rebuttal":"The opponent claims that \\"OMG [NAME]!\\" conveys amusement due to its playful tone, but this interpretation overlooks the fundamental nature of the expression. While \\"OMG\\" can sometimes be associated with lightheartedness, its primary function is to express shock or astonishment. The context of the phrase strongly suggests that the speaker is reacting to something unexpected, which aligns with the definition of surprise. Furthermore, the exclamation mark serves to amplify the intensity of the emotion, reinforcing that this is a reaction to something startling rather than merely entertaining. Therefore, the argument for amusement fails to account for the deeper emotional response embedded in the expression. In conclusion, the classification of surprise is not only appropriate but essential to understanding the true impact of the phrase."}}'



In [107]:
advocate2_1.check_status()

done!


1

In [108]:
t = 0
for i in json_res:
    tmp = [[] for _ in range(len(advocate2_1.outputCol))]
    print(t, i['response']['body']['output'][0]['content'][0]['text'])
    n = json.loads(i['response']['body']['output'][0]['content'][0]['text'])
    t += 1
    for k in range(len(advocate2_1.outputCol)):
        tmp[k].append(n['analysis'][advocate2_1.outputCol[k]])

    advocate2_1.emotion_res.append(tmp)
tmp_df = pd.DataFrame(data=advocate2_1.emotion_res, columns=advocate2_1.outputCol)
advocate2_1.output = pd.concat([advocate2_1.input, tmp_df], axis=1)

0 {"analysis":{"round_2_rebuttal":"The opponent claims that the phrase \"It's wonderful because it's awful\" conveys joy through its paradoxical expression. However, this interpretation overlooks the fundamental nature of confusion inherent in the statement. While the word \"wonderful\" might suggest positivity, its coupling with \"awful\" creates a dissonance that is not easily reconciled. This dissonance is a hallmark of confusion, as it reflects a struggle to understand how something can embody both positive and negative qualities simultaneously.\n\nFurthermore, the assertion that the phrase \"At not with\" invites joyful contemplation is misguided. In reality, it underscores a deeper sense of uncertainty, as it suggests a disconnect or a lack of clarity in understanding one's feelings. The opponent's argument fails to recognize that true joy arises from clarity and resolution of emotions, not from the ambiguous interplay of conflicting sentiments. Therefore, the text is better unde

In [81]:
advocate2_1.parse_result()

done!


JSONDecodeError: Unterminated string starting at: line 1 column 33 (char 32)

In [109]:
advocate2_2.parse_result()

done!


In [110]:
multi_judge_format = {
    "type": "json_schema",
    "name": "result",
    "strict": True,
    "schema": {
        "type": "object",
        "properties": {
            "analysis": {
                "type": "object",
                "properties": {
                    "emotion": {
                        "type": "array",
                        "description": "chosen emotion(s)",
                        "items": {
                            "type": "string",
                            "enum": [ "admiration", "amusement", "anger", "annoyance", "approval", "caring", "confusion", "curiosity", "desire", "disappointment", "disapproval", "disgust", "embarrassment", "excitement", "fear", "gratitude", "grief", "joy", "love", "nervousness", "optimism", "pride", "realization", "relief", "remorse", "sadness", "surprise", "neutral" ]
                        }
                    }
                },
                "required": ["emotion"],
                "additionalProperties": False
            }
        },
        "required": ["analysis"],
        "additionalProperties": False
    }
}

In [111]:
judge_in = advocate2_1.output
judge_in['candidate2_round_2_rebuttal'] = advocate2_2.output['round_2_rebuttal'] 
judge_in['candidate1_round_2_rebuttal'] = advocate2_1.output['round_2_rebuttal']
judge_in = judge_in.rename(columns={'assigned_emotion':'candidate1_emotion', 'my_argument_r1':'candidate1_round_1_arg', 'opponent_emotion':'candidate2_emotion', 'opponent_argument_r1':'candidate2_round_1_arg'})

In [112]:
files = file_init()
judge_system = f"{files['judge_persona']}{files['judge_guidelines']}{files['tier1_description']}{files['fewshot']}"
judge = Agent(judge_system, multi_judge_format, None, None, "../inputs/multi/v3/judge.jsonl", ["emotion"], judge_in, ['text', 'candidate1_emotion', "candidate1_round_1_arg", "candidate1_round_2_rebuttal", "candidate2_emotion", "candidate2_round_1_arg", "candidate2_round_2_rebuttal"])
#batch_6923cf4636ec8190b50a5fe584af5510

In [113]:
judge.make_jsonl()
judge.run()

FileObject(id='file-SadZxKZE5aFtJ5Bwxau7Ah', bytes=2985892, created_at=1764235393, filename='judge.jsonl', object='file', purpose='batch', status='processed', expires_at=1766827393, status_details=None)


In [114]:
judge.parse_result()

done!


In [115]:
result = data_init()
result.index = result.id

In [116]:
for i in tier0.output[tier0.output.isNeutral == True].id:
    result.loc[i, 'emotion'] = ['neutral']

In [117]:
judge.output.index = judge.output.id
for i in judge.output.id:
    result.at[i,'emotion'] = judge.output.loc[i,'emotion'][0]

In [118]:
tier1.output.index = tier1.output.id
for i in tier1.output.id:
    result.at[i,'emotion'] = tier1.output.loc[i,'emotion1']

In [119]:
e = evaluation(data.iloc[:, 2:31], result.emotion.to_list())

--- 모델 평가 결과 ---
전체 샘플에 대한 정확도 (Exact Match Accuracy): 0.2911

--- Micro 평균 지표 ---
Precision (Micro): 0.3666
Recall (Micro): 0.3147
F1-Score (Micro): 0.3387

--- Macro 평균 지표 ---
Precision (Macro): 0.3702
Recall (Macro): 0.3326
F1-Score (Macro): 0.3101

--- 라벨별 지표 ---
admiration - Precision: 0.6737, Recall: 0.1270, F1-Score: 0.2137
amusement - Precision: 0.4045, Recall: 0.6818, F1-Score: 0.5078
anger - Precision: 0.2534, Recall: 0.6515, F1-Score: 0.3649
annoyance - Precision: 0.2128, Recall: 0.1562, F1-Score: 0.1802
approval - Precision: 0.2143, Recall: 0.0855, F1-Score: 0.1222
caring - Precision: 0.2239, Recall: 0.2222, F1-Score: 0.2230
confusion - Precision: 0.1753, Recall: 0.4444, F1-Score: 0.2514
curiosity - Precision: 0.2246, Recall: 0.1866, F1-Score: 0.2038
desire - Precision: 0.3830, Recall: 0.2169, F1-Score: 0.2769
disappointment - Precision: 0.1733, Recall: 0.2318, F1-Score: 0.1983
disapproval - Precision: 0.2075, Recall: 0.3708, F1-Score: 0.2661
disgust - Precision: 0.3590, Re

In [120]:
ek = evaluation_ekman(data.iloc[:, 2:31], result.emotion.to_list())

--- 모델 평가 결과 ---
전체 샘플에 대한 정확도 (Exact Match Accuracy): 0.4769

--- Micro 평균 지표 ---
Precision (Micro): 0.5390
Recall (Micro): 0.4966
F1-Score (Micro): 0.5170

--- Macro 평균 지표 ---
Precision (Macro): 0.4644
Recall (Macro): 0.4636
F1-Score (Macro): 0.4509

--- 라벨별 지표 ---
anger - Precision: 0.3792, Recall: 0.6377, F1-Score: 0.4756
disgust - Precision: 0.3590, Recall: 0.2276, F1-Score: 0.2786
fear - Precision: 0.4331, Recall: 0.5612, F1-Score: 0.4889
joy - Precision: 0.7506, Recall: 0.6407, F1-Score: 0.6913
sadness - Precision: 0.3841, Recall: 0.4327, F1-Score: 0.4069
surprise - Precision: 0.3884, Recall: 0.4165, F1-Score: 0.4020
neutral - Precision: 0.5564, Recall: 0.3285, F1-Score: 0.4131

Precision (Macro) 표준편차: 0.1320
Recall (Macro) 표준편차: 0.1456
F1-Score (Macro) 표준편차: 0.1167


In [240]:
result = tier1.output['emotion1'].to_list()

In [241]:
for i in range(len(judge.output)):
    result[judge.output['index'][i]] = judge.output['emotion'][i][0]