## 1. 환경 설정

In [13]:
import os, json, openai
from tqdm import tqdm
from dotenv import load_dotenv
import pandas as pd
from collections import defaultdict
from openai import OpenAI

load_dotenv()
client = OpenAI()

# 디렉터리 설정
os.makedirs("results", exist_ok=True)

## 2. 데이터 로드

In [27]:
import json, os
from collections import defaultdict

# 6개 파일 로드
with open("../../../../dataset/PIPA/cases/illegal.jsonl", "r", encoding="utf-8") as f:
    cases = [json.loads(line) for line in f]
    cases = cases[:10]

with open("pseudocode_law.json", "r", encoding="utf-8") as f:
    pseudocode_law = json.load(f)
with open("pseudocode_decree.json", "r", encoding="utf-8") as f:
    pseudocode_decree = json.load(f)

with open("accumulated_variables_law.json", "r", encoding="utf-8") as f:
    checklist_law = json.load(f)
with open("accumulated_variables_decree.json", "r", encoding="utf-8") as f:
    checklist_decree = json.load(f)

with open("../../../../dataset/PIPA/law/law.json", "r", encoding="utf-8") as f:
    full_law = json.load(f)
with open("../../../../dataset/PIPA/law/decree.json", "r", encoding="utf-8") as f:
    full_decree = json.load(f)

# 전체 변수 초기화
def init_vars(full_json):
    vars_dict = {}
    for node in full_json:
        var = node.get("var_name")
        if var:
            vars_dict[var] = {"applicability": False, "legal": True}
    return vars_dict

base_vars_law_full = init_vars(full_law)
base_vars_decree_full = init_vars(full_decree)

## 3. 트리 생성 함수

In [28]:
def build_tree_with_root(pseudocode_data, root_id, root_name):
    nodes = {item["id"]: item for item in pseudocode_data}
    tree = defaultdict(list)
    for item in pseudocode_data:
        parent = item.get("parent")
        if parent and parent in nodes:
            tree[parent].append(item["id"])
        else:
            tree[root_id].append(item["id"])
    root_node = {
        "id": root_id,
        "var_name": root_id,
        "class": "root",
        "title": root_name,
        "parent": None,
        "pseudocode": {
            "applicability_pseudocode": "True",
            "legal_pseudocode": "True",
            "action_pseudocode": ""
        }
    }
    nodes[root_id] = root_node
    return tree, nodes

tree_law, nodes_law = build_tree_with_root(pseudocode_law, "LAW_ROOT", "개인정보보호법")
tree_decree, nodes_decree = build_tree_with_root(pseudocode_decree, "DECREE_ROOT", "시행령")


## 4. GPT-4o로 변수 확정 함수

In [29]:
def ask_gpt_true_false(case_content: str, question: str):
    """
    비즈니스 문서(case_content)와 질문(question)을 GPT-4o에 주고
    True 혹은 False만 받는 함수
    """
    prompt = f"""
다음은 비즈니스 활동에 대한 설명입니다.

[비즈니스 문서]
{case_content}

[질문]
{question}

위 문서의 내용을 바탕으로 질문의 진위 여부를 판단하세요.
'True' 또는 'False' 중 하나만 답변하세요.
그 외 어떤 설명도 하지 마세요.
"""

    completion = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": "You are a compliance reasoning assistant. Output must be either True or False."},
            {"role": "user", "content": prompt}
        ],
        temperature=0
    )
    answer = completion.choices[0].message.content.strip()
    return True if "True" in answer else False

## 5. 트리 순회 함수 (오류 무시 + try/except)

In [30]:
def safe_eval(expr, context):
    try:
        return eval(expr, {}, context)
    except Exception:
        return False

def traverse(tree, nodes, variables, root_id):
    def preorder(node_id):
        node = nodes[node_id]
        code = node["pseudocode"].get("applicability_pseudocode", "")
        var_name = node["var_name"]
        if var_name not in variables:
            variables[var_name] = {"applicability": False, "legal": True}
        try:
            result = safe_eval(code, variables)
        except Exception:
            result = False
        variables[var_name]["applicability"] = bool(result)
        if result:
            for child in tree.get(node_id, []):
                preorder(child)
        # else면 하위 스킵 (상위 노드로 복귀)

    def postorder(node_id):
        for child in tree.get(node_id, []):
            postorder(child)
        node = nodes[node_id]
        var_name = node["var_name"]
        p = node["pseudocode"]
        try:
            if safe_eval(p.get("legal_pseudocode", ""), variables):
                variables[var_name]["legal"] = False
        except Exception:
            pass
        try:
            act = p.get("action_pseudocode", "")
            if act:
                exec(act, {}, variables)
        except Exception:
            pass

    preorder(root_id)
    postorder(root_id)
    return variables

## 6. 메인 루프 (GPT 개별 질문 + 트리 순회)

In [33]:
# ==============================
# 📊 6. 메인 루프 (한 tqdm 바에 전체 질문 진행률 표시)
# ==============================
import os, json, pandas as pd
from copy import deepcopy
from tqdm import tqdm

summary = []
os.makedirs("results", exist_ok=True)

for case_idx, case in enumerate(cases, start=1):
    case_id = case["case_id"]
    content = case["case_content"]
    gt_law = [a for a in case["violated_articles"] if "보호법" in a]
    gt_decree = [a for a in case["violated_articles"] if "시행령" in a]

    case_dir = os.path.join("results", case_id)
    os.makedirs(case_dir, exist_ok=True)

    # --- 초기화 (full variable 기반) ---
    law_vars = deepcopy(base_vars_law_full)
    decree_vars = deepcopy(base_vars_decree_full)

    # 전체 질문 수 = 개인정보보호법 + 시행령
    total_questions = len(checklist_law) + len(checklist_decree)
    answered = 0

    pbar = tqdm(total=total_questions, desc=f"[{case_id}] 질문 진행", unit="질문", leave=True)

    # --- 개인정보보호법 질문 ---
    law_answers = {}
    for item in checklist_law:
        v, q = item["variable"], item["question"]
        law_answers[v] = ask_gpt_true_false(content, q)
        answered += 1
        pbar.set_postfix_str(f"{answered}/{total_questions}")
        pbar.update(1)

    # --- 시행령 질문 ---
    decree_answers = {}
    for item in checklist_decree:
        v, q = item["variable"], item["question"]
        decree_answers[v] = ask_gpt_true_false(content, q)
        answered += 1
        pbar.set_postfix_str(f"{answered}/{total_questions}")
        pbar.update(1)

    pbar.close()

    # --- GPT 결과 저장 ---
    with open(os.path.join(case_dir, "law_answers.json"), "w", encoding="utf-8") as f:
        json.dump(law_answers, f, ensure_ascii=False, indent=2)
    with open(os.path.join(case_dir, "decree_answers.json"), "w", encoding="utf-8") as f:
        json.dump(decree_answers, f, ensure_ascii=False, indent=2)

    # --- context 병합 ---
    law_context = {**law_vars, **law_answers}
    decree_context = {**decree_vars, **decree_answers}

    # --- 트리 순회 ---
    law_result = traverse(tree_law, nodes_law, law_context, "LAW_ROOT")
    decree_result = traverse(tree_decree, nodes_decree, decree_context, "DECREE_ROOT")

    # --- 위반 조항 예측 ---
    pred_law = [v for v, d in law_result.items() if isinstance(d, dict) and not d.get("legal", True)]
    pred_decree = [v for v, d in decree_result.items() if isinstance(d, dict) and not d.get("legal", True)]

    # --- 결과 저장 ---
    with open(os.path.join(case_dir, "law_variables.json"), "w", encoding="utf-8") as f:
        json.dump(law_result, f, ensure_ascii=False, indent=2)
    with open(os.path.join(case_dir, "decree_variables.json"), "w", encoding="utf-8") as f:
        json.dump(decree_result, f, ensure_ascii=False, indent=2)

    # --- summary 추가 ---
    summary.append({
        "case_id": case_id,
        "content": content[:200] + "...",
        "gt_law": gt_law,
        "gt_decree": gt_decree,
        "pred_law": pred_law,
        "pred_decree": pred_decree
    })

# --- CSV 저장 ---
pd.DataFrame(summary).to_csv("results/summary.csv", index=False, encoding="utf-8-sig")
print("\n✅ 모든 케이스 처리 완료 → results/summary.csv 생성됨")


[2023-012-135] 질문 진행:   0%|          | 0/334 [00:00<?, ?질문/s]

[2023-012-135] 질문 진행: 100%|██████████| 334/334 [04:28<00:00,  1.25질문/s, 334/334]
[2023-012-132] 질문 진행: 100%|██████████| 334/334 [03:45<00:00,  1.48질문/s, 334/334]
[2023-012-127] 질문 진행: 100%|██████████| 334/334 [03:39<00:00,  1.52질문/s, 334/334]
[2023-012-120] 질문 진행: 100%|██████████| 334/334 [04:26<00:00,  1.25질문/s, 334/334]
[2023-012-121] 질문 진행: 100%|██████████| 334/334 [03:53<00:00,  1.43질문/s, 334/334]
[2023-012-126] 질문 진행: 100%|██████████| 334/334 [04:47<00:00,  1.16질문/s, 334/334]
[2023-012-133] 질문 진행: 100%|██████████| 334/334 [04:12<00:00,  1.32질문/s, 334/334]
[2023-012-134] 질문 진행: 100%|██████████| 334/334 [04:01<00:00,  1.38질문/s, 334/334]
[2023-012-117] 질문 진행: 100%|██████████| 334/334 [03:33<00:00,  1.56질문/s, 334/334]
[2023-012-124] 질문 진행: 100%|██████████| 334/334 [03:50<00:00,  1.45질문/s, 334/334]


✅ 모든 케이스 처리 완료 → results/summary.csv 생성됨



