In [None]:
from openai import OpenAI
import pandas as pd
import os
import json

### Load Evaluation Question (Rubrics) and Template

In [None]:
# load the rubrics for general part of the dataset
FILE_PATH = os.path.join('eval_rubric', 'rubrics_general.xlsx')
df_rubrics_general = pd.read_excel(FILE_PATH)
df_rubrics_general.head()

# dictionary to store rubrics for each specific harms
rubrics_general = {}
current_category = None

# iterate through the rows
for _, row in df_rubrics_general.iterrows():
    if pd.notna(row["Category"]):
        current_category = row["Category"]
        rubrics_general[current_category] = []
    rubrics_general[current_category].append(f"{row['Question']}")
    
# rubrics_general

In [None]:
# load the rubrics for Indonesian-specific part of the dataset
FILE_PATH = os.path.join('eval_rubric', 'rubrics_indonesia.xlsx')
df_rubrics_indonesian = pd.read_excel(FILE_PATH)
df_rubrics_indonesian.head()

# dictionary to store rubrics for each specific harms
rubrics_indonesian = {}
current_category = None

# iterate through the rows
for _, row in df_rubrics_indonesian.iterrows():
    if pd.notna(row["Category"]):
        current_category = row["Category"]
        rubrics_indonesian[current_category] = []
    rubrics_indonesian[current_category].append(f"{row['Question']}")

# rubrics_indonesian

In [None]:
# load the evaluation template
template = ""
FILE_PATH = os.path.join('prompt_templates', 'eval_template.txt')
with open(FILE_PATH, "r") as file:
    template = file.read()
print(template)

### Judge Responses With GPT-4o

In [None]:
# Set your API key securely
client = OpenAI(api_key="YOUR_API_KEY")

# set input and output dir
INPUT_DIR = '1_responses'
OUTPUT_DIR = '2_responses_judged'

# Listing all .xlsx files to process
files = [f for f in os.listdir(INPUT_DIR) if f.endswith('.xlsx')]
files

In [None]:
file_to_requests = dict()

# loop through all files
for file in files:
    requests = []
    file_path = os.path.join(INPUT_DIR, file)
    
    print(f"Processing file: {file}")

    # load the excel file
    df = pd.read_excel(file_path)

    # loop through all rows
    for index, row in df.iterrows():
        # assuming the column 4 is the prompt and column 5 is the response
        question = str(row[4])
        response = str(row[5])

        # for region-specific sensitivity, the rubrics are for each specific harm
        # for general sensitivity, the rubrics are for each risk area
        if row['risk_area'] == 'Region-specific Sensitivity':
            category = row['specific_harms']
            rubrics = rubrics_indonesian[category]
        else:
            category = row['risk_area']
            rubrics = rubrics_general[category]

        for i, rubric in enumerate(rubrics, start=1):
            column_name = f"Rubric {i}"
            column_name_q = f"Rubric {i} question"
            filled_template = template 
            filled_template = filled_template.replace("[Q_EVALUASI]", rubric) \
                                    .replace("[PERTANYAAN]", question) \
                                    .replace("[JAWABAN]", response)
            message = [
                {"role": "system", "content": "You are a helpful AI assistant. Answer questions according to the language used by the user."},
                {"role": "user", "content": filled_template}
            ]

            # encode rubric number 'i' for prompt number 'index' to custom_id
            # will be useful later
            requests.append({
                "custom_id": f'{index}_{i}',
                "method": "POST",
                "url": "/v1/chat/completions",
                "body": {
                    "model": "gpt-4o",
                    "messages": message,
                }
            })
    file_to_requests[file] = requests
file_to_requests

In [None]:
# write temporary jsonl for batch API request
requests_dir = 'temp_jsonl'
file_to_batches = dict()

# send batch request for each .xlsx file
for file in file_to_requests.keys():
    filename_jsonl = file[:-5] + ".jsonl" # replace file format .xlsx (last five characters) to .jsonl
    path_jsonl = os.path.join(requests_dir, filename_jsonl)
    with open(path_jsonl, "w") as f:
        for req in file_to_requests[file]:
            f.write(json.dumps(req) + "\n")

    # upload file for batch processing
    uploaded_file = client.files.create(
        file=open(path_jsonl, "rb"),
        purpose="batch"
    )

    # send batch request
    batch = client.batches.create(
        input_file_id=uploaded_file.id,
        endpoint="/v1/chat/completions",
        completion_window="24h",
        metadata={
            "description": f"Using GPT-4o as a judge for: {file}"
        }
    )

    # keep track of each file's corresponding batch id
    file_to_batches[file] = batch.id

file_to_batches

In [None]:
# check all batch status
for filename in file_to_batches.keys():
    batch_status = client.batches.retrieve(file_to_batches[filename])
    print(f"‚è≥ Status {filename}: {batch_status.status}")

In [None]:
# WARNING: only run this part after all batch processing are completed

# process each file
for filename in file_to_batches.keys():
    batch_status = client.batches.retrieve(file_to_batches[filename])
    output_file_id = batch_status.output_file_id
    file_response = client.files.content(output_file_id)
    results = [json.loads(line) for line in file_response.text.strip().split("\n")]
    file_path = os.path.join(INPUT_DIR, filename)

    # add more if use more rubrics
    df_result = pd.read_excel(file_path)
    df_result["Rubric 1"] = None
    df_result["Rubric 2"] = None
    df_result["Rubric 3"] = None

    for result in results:
        prompt_number, rubric_number = result['custom_id'].split("_") # decode rubric number and prompt number
        content = result['response']['body']['choices'][0]['message']['content']
        df_result.iloc[int(prompt_number), df_result.columns.get_loc(f"Rubric {rubric_number}")] = content
    
    # create output file
    output_file_name = f"result_{filename}"
    output_path = os.path.join(OUTPUT_DIR, output_file_name)
    df_result.to_excel(output_path, index=False)
    print(f"Finished processing {filename}")