# Libraries

In [19]:
import os
import json
import re
import pandas as pd
from datetime import datetime
from openai import OpenAI
from sklearn.metrics import f1_score
# from pprint import pprint

pd.set_option('max_colwidth', None)

# Config

In [28]:
seed = 42
client = OpenAI(api_key=json.load(open('secrets.json'))['OPENAPI_SECRET_KEY'])
idn2eng_emotion_map = {
    'marah': 'anger',
    'jijik': 'disgust',
    'takut': 'fear',
    'senang': 'joy',
    'sedih': 'sadness',
    'terkejut': 'surprise',
    'biasa': 'neutral',
}
prompt_paths = [
    'prompts/prompt_with_keywords_and_analysis.txt',
    'prompts/prompts_with_keywords.txt',
]
prompt_evaluations_dir = 'prompt_evaluations'

# Data

## Load Data

In [8]:
df = pd.read_csv('data/preprocessed_data/track_a/sun_70_15_15_stratify_v2/val.csv')
df = df.rename(columns=idn2eng_emotion_map) # Translate emotion columns from Indonesian to English

print("DF size:", len(df))
df.head()

DF size: 128


Unnamed: 0,text,emotion,anger,disgust,fear,joy,sadness,surprise,neutral
0,alhamdulillah hatur nuhun otw nonton ieu mah,senang,0,0,0,1,0,0,0
1,"Nongton iklan nepi ka tamat ,😭",senang,0,0,0,1,0,0,0
2,min ai mnh teu cape lulumpatan wae :v kumaha weh ngagawe keun jelma mah ceuk mang dana ge :v,"senang, terkejut",0,0,0,1,0,1,0
3,"Hebaaat , mang mung basa sundana rada rapihken ulah babeulit, nya, nya","senang, terkejut",0,0,0,1,0,1,0
4,Aya meri dina rakit Boboko wadah bakatul Lain nyeri ku panyakit Kabogoh direbut batur🤣🤣😭,"senang, sedih, terkejut",0,0,0,1,1,1,0


In [9]:
# Remove data with 'neutral' emotion and the 'neutral' column
if 'neutral' in df.columns:
    df = df[~(df['neutral'] == 1)]  
    df = df.drop('neutral', axis=1) 

# Store emotion columns
emotion_cols = [col for col in df.columns if col not in ['Unnamed: 0', 'text', 'emotion', 'id']]
df['emotion'] = df.apply(lambda row: ', '.join([emotion for emotion in emotion_cols if row[emotion] == 1]), axis=1)

print("DF size (after removing emotion 'neutral'):", len(df))
print("Emotion columns:", emotion_cols)
df.head()

DF size (after removing emotion 'neutral'): 121
Emotion columns: ['anger', 'disgust', 'fear', 'joy', 'sadness', 'surprise']


Unnamed: 0,text,emotion,anger,disgust,fear,joy,sadness,surprise
0,alhamdulillah hatur nuhun otw nonton ieu mah,joy,0,0,0,1,0,0
1,"Nongton iklan nepi ka tamat ,😭",joy,0,0,0,1,0,0
2,min ai mnh teu cape lulumpatan wae :v kumaha weh ngagawe keun jelma mah ceuk mang dana ge :v,"joy, surprise",0,0,0,1,0,1
3,"Hebaaat , mang mung basa sundana rada rapihken ulah babeulit, nya, nya","joy, surprise",0,0,0,1,0,1
4,Aya meri dina rakit Boboko wadah bakatul Lain nyeri ku panyakit Kabogoh direbut batur🤣🤣😭,"joy, sadness, surprise",0,0,0,1,1,1


# Evaluation

In [10]:
def get_prompt(prompt_path):
    prompt = None
    with open(prompt_path, 'r') as f:
        prompt = f.read()
    return prompt

def one_hot_encode_emotions(emotions, emotion_cols=emotion_cols):
    one_hot_emotion = [1 if emotion_col in emotions else 0 for emotion_col in emotion_cols]
    return one_hot_emotion

def get_pred_tag_content(prediction, tag_name):
    match = re.search(fr'<{tag_name}>(.*?)</{tag_name}>', prediction, re.DOTALL)
    tag_content = match.group(1).strip() if match else None
    return tag_content

def predict(text, prompt):
    completion = client.chat.completions.create(
        model='gpt-4o',
        messages=[
            {'role': 'system', 'content': prompt},
            {'role': 'user', 'content': text},
        ],
        max_tokens=1000,
    )
    return completion.choices[0].message.content

prediction = predict(
    text="Cek urang mah jelema goblog teh nyaeta, jelema nu menta udud ngomong teu boga, padahal Aya sabungkus dinu pesak 😂🤣",
    prompt=get_prompt(prompt_paths[0]),
)

keywords_pred = get_pred_tag_content(prediction, 'keywords')
print("Precicted keywords and their analyses:")
print(keywords_pred)
print()

emotions_pred = get_pred_tag_content(prediction, 'emotions').lower().replace("." , "").replace(" ", "").split(",")
print("Precicted emotions:", emotions_pred, "->", one_hot_encode_emotions(emotions_pred))

Precicted keywords and their analyses:
<keyword>
- Keyword: goblog
- Analysis: The term "goblog" in Sundanese and Indonesian is often used as an insult, which translates to "stupid" or "fool" in English. According to the guidelines, insults typically indicate both `anger` and `disgust`. This suggests that the use of "goblog" conveys a certain level of contempt or frustration, possibly towards the person described as such or the situation. The speaker might be expressing their irritation or annoyance with the actions of the individual they are describing, which aligns with a sense of anger. Simultaneously, the term could imply a sense of disdain or scorn towards what they perceive as foolish behavior, aligning with the emotion of disgust. The choice of the word "goblog" is not just casual criticism; it carries a weight of emotional intensity that usually stems from underlying negative feelings like anger or disgust towards the subject in question.
- Emotions: anger, disgust
</keyword>



In [66]:
def eval_sampled(n, prompt_path):
    prompt = get_prompt(prompt_path)

    df_sampled = df.sample(n=n)
    y_true = df_sampled[emotion_cols].to_numpy()

    y_pred = []
    for i, v in enumerate(df_sampled.iterrows()):
        idx, row = v

        print("=" * 64)
        print(f"{i+1} OF {len(df_sampled)}")
        print("=" * 64)

        is_valid = True

        while True:
            print("Predicting...")        

            prediction = predict(row['text'], prompt)
            keywords_pred = get_pred_tag_content(prediction, 'keywords')
            emotions_pred = get_pred_tag_content(prediction, 'emotions').replace(" ", "").split(",")
            emotions_pred_one_hot = one_hot_encode_emotions(emotions_pred, emotion_cols)

            is_valid = bool(sum(emotions_pred_one_hot))

            if is_valid:
                break
            else:
                print("Error! Prediction:", prediction)
                print()
            
        y_pred.append(emotions_pred_one_hot)

        print()
        print("# TEXT")
        print(row['text'])
        print()
        print("# KEYWORDS")
        print(keywords_pred)
        print()
        print("# PREDICTED EMOTIONS")
        print(emotions_pred)
        print()
        print("# TRUE EMOTIONS")
        print(row['emotion'])
        print()

    f1_macro = f1_score(y_true, y_pred, average='macro', zero_division=1.0)
    f1_micro = f1_score(y_true, y_pred, average='micro', zero_division=1.0)
    f1_labels = f1_score(y_true, y_pred, average=None, zero_division=1.0)
    f1_labels_dict = {f'f1_label_{emotion_cols[i]}': float(f1_labels[i]) for i in range(len(f1_labels))}

    return {
        'prompt_path': prompt_path,
        'n': n,
        'f1_macro': float(f1_macro),
        'f1_micro': float(f1_micro),
        **f1_labels_dict,
    }

prompt_eval = []
for prompt_path in prompt_paths:
    print("=" * 128)
    print("PROMPT PATH:", prompt_path)
    print("=" * 128)

    evaluation = eval_sampled(n=20, prompt_path=prompt_path)
    print("Evaluation:")
    print(evaluation)

    prompt_eval.append(evaluation)
    
    print()

PROMPT PATH: prompts/prompt_with_keywords_and_analysis.txt
1 OF 20
Predicting...

# TEXT
Mang pas mang dana nyebutkeun "tah urang inget" langsung aya iklan gojek mang, saya ge jadi kainget naha teu make gofood, 😁

# KEYWORDS
<keyword>
- Keyword: 😁
- Analysis: Hmm, let's take a closer look at this emoji. The emoji 😁 is generally associated with a wide, beaming smile that conveys happiness or a good-natured feeling. According to the guidelines provided, smile or laugh emojis such as 😁 typically indicate joy. This smile could result from recalling something pleasant, funny, or light-hearted. The context indicates that the user is recalling a situation where the mention of "tah urang inget" instantly brought to mind a Gojek advertisement, which in turn led to an amusing thought about not using GoFood. This sequence of thoughts could easily provoke a smile or laughter, hence the use of this emoji. Therefore, based on the guidelines and the surrounding context, 😁 is strongly indicative of jo

In [14]:
prompt_eval

[{'prompts/prompt_with_keywords_and_analysis.txt': {'f1_macro': 0.45359848484848486,
   'f1_micro': 0.6545454545454545,
   'f1_label_anger': 0.0,
   'f1_label_disgust': 1.0,
   'f1_label_fear': 0.0,
   'f1_label_joy': 0.8125,
   'f1_label_sadness': 0.9090909090909091,
   'f1_label_surprise': 0.0}},
 {'prompts/prompts_with_keywords.txt': {'f1_macro': 0.4454286454286455,
   'f1_micro': 0.6909090909090909,
   'f1_label_anger': 0.8,
   'f1_label_disgust': 0.0,
   'f1_label_fear': 0.0,
   'f1_label_joy': 0.9230769230769231,
   'f1_label_sadness': 0.7272727272727273,
   'f1_label_surprise': 0.2222222222222222}}]

## Save Evaluation

In [68]:
with open(os.path.join(prompt_evaluations_dir, f'prompt_eval_{datetime.now().strftime("%Y%m%d%H%M%S")}.json'), 'w') as f:
    json.dump(prompt_eval, f, indent=4)

## View Saved Evaluations

In [70]:
prompt_eval_names = os.listdir(prompt_evaluations_dir)
prompt_eval_dfs = [pd.read_json(os.path.join(prompt_evaluations_dir, prompt_eval_name)) for prompt_eval_name in prompt_eval_names]
grouped_df = pd.concat(prompt_eval_dfs).groupby('prompt_path', as_index=False).mean()
grouped_df

Unnamed: 0,prompt_path,n,f1_macro,f1_micro,f1_label_anger,f1_label_disgust,f1_label_fear,f1_label_joy,f1_label_sadness,f1_label_surprise
0,prompts/prompt_with_keywords_and_analysis.txt,20.0,0.517154,0.70622,0.5,0.666667,0.0,0.851478,0.779221,0.305556
1,prompts/prompts_with_keywords.txt,20.0,0.529844,0.725659,0.766667,0.333333,0.0,0.899267,0.849832,0.329966
