코드 경로: ./environments/CONTRACT/main/0817_make_fusion_graphs.ipynb

## 경로 설정

In [2]:
# utils 절대 경로를 sys.path에 추가
import sys
import os

# 현재 작업 디렉토리에서 프로젝트 루트로 이동
PROJECT_ROOT = "/Users/taeyoonkwack/Documents/compliance_checking"
if PROJECT_ROOT not in sys.path:
    sys.path.insert(0, PROJECT_ROOT)

# 현재 작업 디렉토리 확인 및 변경
print(f"Current working directory: {os.getcwd()}")
os.chdir(PROJECT_ROOT)
print(f"Changed working directory to: {os.getcwd()}")

# 표준 임포트
import re
import json
from typing import Dict, List, Optional
import pandas as pd
from tqdm.auto import tqdm

# utils 파이프라인 임포트 (문서 → eventic → fusion)
try:
    from utils.fusion_graph_builder import build_fusion_graph
    import utils.eventic_graph_builder as evg
    print("✓ Successfully imported utils modules")
except ImportError as e:
    print(f"❌ Import error: {e}")
    # 대안 import 시도
    try:
        sys.path.insert(0, os.path.join(PROJECT_ROOT, 'utils'))
        from fusion_graph_builder import build_fusion_graph
        import eventic_graph_builder as evg
        print("✓ Successfully imported from utils directory")
    except ImportError as e2:
        print(f"❌ Alternative import also failed: {e2}")
        raise

INPUT_CSV  = "environments/CONTRACT/data/manipulated/selected_original_data.csv"
OUTPUT_JSON = "environments/CONTRACT/data/manipulated/contract_norms_fusion_graph.json"

FUSION_PARAMS = dict(
    rounds=2,
    use_concept_graph=True,
    use_entity_graph=True,
    use_term_definition_graph=True,
    include_tdg_edges=True,
)

Current working directory: /Users/taeyoonkwack/Documents/compliance_checking/environments/CONTRACT/main
Changed working directory to: /Users/taeyoonkwack/Documents/compliance_checking


  from .autonotebook import tqdm as notebook_tqdm


❌ Import error: No module named 'eventic_graph_builder'
✓ Successfully imported from utils directory


In [3]:
# CSV 파일 읽기 및 첫 줄 데이터 출력
df = pd.read_csv(INPUT_CSV)
print("첫 번째 행 데이터:")
print(df.iloc[0])
print("\n컬럼명:")
print(df.columns.tolist())
print(f"\n총 {len(df)}개의 행이 있습니다.")

첫 번째 행 데이터:
contract_id                                                  466
norm_id_1                                                  82524
norm_id_2                                                  83696
norm1          To this end, subject to any confidentiality ag...
norm2          Solectron can choose not to inform new industr...
conflict                                                       1
Name: 0, dtype: object

컬럼명:
['contract_id', 'norm_id_1', 'norm_id_2', 'norm1', 'norm2', 'conflict']

총 198개의 행이 있습니다.


In [4]:
from utils import build_fusion_graph
import utils.eventic_graph_builder as evg
import json

eventic = evg.build_eventic_graph(df.iloc[0]["norm1"])
print("\n=== Eventic Graph ===")
print(json.dumps(eventic, ensure_ascii=False, indent=2))

# 2) Eventic Graph → Fusion Graph
fusion = build_fusion_graph(
    eventic,
    rounds=2,
    use_concept_graph=True,
    use_entity_graph=True,
    use_term_definition_graph=True,
    include_tdg_edges=True
)
print("\n=== Fusion Graph ===")
print(json.dumps(fusion, ensure_ascii=False, indent=2))

[VERBOSE] Sending request to OpenAI API...
[VERBOSE] Raw response: ```json
[
  {"Agent": "Solectron", "Deontic": "will", "Action": "inform acquisition of new and emerging Solectron and industry technology subject to confidentiality agreements Solectron may have"},
  {"Agent": "Solectron", "Deontic": "will", "Action": "provide opportunity acquisition of new and emerging Solectron and industry technology subject to confidentiality agreements Solectron may have"}
]
```
[VERBOSE] Extracted JSON block:
 [
  {"Agent": "Solectron", "Deontic": "will", "Action": "inform acquisition of new and emerging Solectron and industry technology subject to confidentiality agreements Solectron may have"},
  {"Agent": "Solectron", "Deontic": "will", "Action": "provide opportunity acquisition of new and emerging Solectron and industry technology subject to confidentiality agreements Solectron may have"}
]

=== Eventic Graph ===
[]
[VERBOSE] Build fusion graph start
[VERBOSE] Graphs enabled → CG=True, EG=True

In [None]:
# 전체 데이터 처리 및 fusion graph 생성
import time

output_file = "environments/CONTRACT/data/manipulated/contract_fusion_graphs.json"
results = []

print(f"Processing {len(df)} contracts...")

for idx, row in tqdm(df.iterrows(), total=len(df), desc="Processing contracts"):
    try:
        contract_id = row['contract_id']
        norm_id_1 = row['norm_id_1']
        norm_id_2 = row['norm_id_2']
        norm1_text = row['norm1']
        norm2_text = row['norm2']
        conflict = row['conflict']
        
        print(f"\n[{idx+1}/{len(df)}] Processing contract {contract_id}")
        
        # Process norm1
        print("  Building eventic graph for norm1...")
        eventic1 = evg.build_eventic_graph(norm1_text)
        
        print("  Building fusion graph for norm1...")
        fusion1 = build_fusion_graph(
            eventic1,
            rounds=FUSION_PARAMS['rounds'],
            use_concept_graph=FUSION_PARAMS['use_concept_graph'],
            use_entity_graph=FUSION_PARAMS['use_entity_graph'],
            use_term_definition_graph=FUSION_PARAMS['use_term_definition_graph'],
            include_tdg_edges=FUSION_PARAMS['include_tdg_edges']
        )
        
        # Process norm2
        print("  Building eventic graph for norm2...")
        eventic2 = evg.build_eventic_graph(norm2_text)
        
        print("  Building fusion graph for norm2...")
        fusion2 = build_fusion_graph(
            eventic2,
            rounds=FUSION_PARAMS['rounds'],
            use_concept_graph=FUSION_PARAMS['use_concept_graph'],
            use_entity_graph=FUSION_PARAMS['use_entity_graph'],
            use_term_definition_graph=FUSION_PARAMS['use_term_definition_graph'],
            include_tdg_edges=FUSION_PARAMS['include_tdg_edges']
        )
        
        # 결과 구조 생성
        result = {
            "contract_id": int(contract_id),
            "norm1": {
                "norm_id": int(norm_id_1),
                "norm_text": norm1_text,
                "fusion_graph": fusion1
            },
            "norm2": {
                "norm_id": int(norm_id_2),
                "norm_text": norm2_text,
                "fusion_graph": fusion2
            },
            "conflict": int(conflict)
        }
        
        results.append(result)
        
        # 매번 저장 (데이터 유실 방지)
        with open(output_file, 'w', encoding='utf-8') as f:
            json.dump(results, f, ensure_ascii=False, indent=2)
        
        print(f"  ✓ Processed and saved contract {contract_id}")
        
        # API 호출 제한을 위한 짧은 대기
        time.sleep(0.5)
        
    except Exception as e:
        print(f"  ❌ Error processing contract {contract_id}: {str(e)}")
        continue

print(f"\n✓ Processing complete! Saved {len(results)} contracts to {output_file}")

In [None]:
# 결과 확인
print(f"Total processed contracts: {len(results)}")
if results:
    print("\nFirst result structure:")
    print(json.dumps(results[0], ensure_ascii=False, indent=2)[:500] + "...")