In [1]:
import json
from pathlib import Path
from tqdm import tqdm   # 진행바

CATEGORY_QUESTION_MAP = {
    "원인":    "Select the option that best explains why the event occurred in the preceding context.",
    "후행사건": "Choose the next event that is most likely to happen after the dialogue above.",
    "전제": "What condition must be in place first for the event to occur?",
    "동기": "Based on the dialogue, select the option that best represents the speaker's inner motive or desire.",
    "반응": "Select the emotional state the speaker is displaying in the dialogue above."
}

In [2]:
input_path  = Path("../dataset/dev_english.json")               
output_path = Path("../dataset/dev_eng_question_added.json")

with input_path.open("r", encoding="utf-8") as f:
    dataset = json.load(f)

In [3]:
added_cnt = 0
pbar = tqdm(dataset, desc="Adding questions", unit="sample")

for sample in pbar:
    category = sample.get("input", {}).get("category", "")
    question = CATEGORY_QUESTION_MAP.get(category)

    if question:
        sample["input"]["question"] = question
        added_cnt += 1
    
    # 진행바 우측에 누적 추가 수 표시
    pbar.set_postfix({"added": added_cnt})

pbar.close()

with output_path.open("w", encoding="utf-8") as f:
    json.dump(dataset, f, ensure_ascii=False, indent=2)

print(f"✅ 질문 {added_cnt}개 추가 완료 · 저장 경로: {output_path.resolve()}")


Adding questions:   0%|          | 0/151 [00:00<?, ?sample/s, added=6]

Adding questions: 100%|██████████| 151/151 [00:00<00:00, 4187.59sample/s, added=151]

✅ 질문 151개 추가 완료 · 저장 경로: /Users/taeyoonkwack/Documents/HCLT-KACL-2025/dataset/dev_eng_question_added.json



