## Ablation 스위치

In [1]:
TERM_DEFINITION_ON = True
ENTITY_ON = True
CONCEPT_ON = True
EVENTIC_ON = True
FULL_GRAPH_ON = True # 이게 True면 위의 모든 스위치를 무시하고 모든 내용이 포함됨

## 런타임 준비 & 필수 라이브러리 설치 & 허깅페이스 로그인

In [2]:
!pip -q install "transformers>=4.43.0" accelerate bitsandbytes "scikit-learn>=1.3.0" pandas tqdm
!apt-get -qq update
!apt-get -qq install -y git-lfs
!git lfs install
!rm -rf compliance_checking
!git clone https://github.com/beefed-up-geek/compliance_checking.git
!pip -q install -U "huggingface_hub[cli]" transformers accelerate

import torch, sys, os, re, json, math, random, pandas as pd
from tqdm import tqdm
from sklearn.metrics import accuracy_score, matthews_corrcoef
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig

!huggingface-cli login

W: Skipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry misspelt?)
Git LFS initialized.
Cloning into 'compliance_checking'...
remote: Enumerating objects: 109, done.[K
remote: Counting objects: 100% (109/109), done.[K
remote: Compressing objects: 100% (93/93), done.[K
remote: Total 109 (delta 20), reused 98 (delta 10), pack-reused 0 (from 0)[K
Receiving objects: 100% (109/109), 9.36 MiB | 17.94 MiB/s, done.
Resolving deltas: 100% (20/20), done.
Filtering content: 100% (7/7), 530.53 MiB | 18.33 MiB/s, done.

    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|  

## 모델 로드 (fp16)

In [3]:
from transformers import AutoModelForCausalLM, AutoTokenizer

# Qwen/Qwen2-7B-Instruct, google/gemma-7b-it, meta-llama/Llama-3.1-8B-Instruct
MODEL_ID = "meta-llama/Llama-3.1-8B-Instruct"

tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, use_fast=True)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    torch_dtype=torch.float16,
    device_map="auto",
    trust_remote_code=True,
)

# pad 토큰 설정(경고 방지)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

print("Loaded:", MODEL_ID)
print("Model dtype:", model.dtype)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Loaded: meta-llama/Llama-3.1-8B-Instruct
Model dtype: torch.float16


## 데이터 불러오기

In [4]:
import json, os

LOCAL_PATH = "compliance_checking/environments/CONTRACT/data/manipulated/contract_norms_fusion_graph.json"
assert os.path.exists(LOCAL_PATH), "파일 경로를 확인하세요."

with open(LOCAL_PATH, "r") as f:
    data = json.load(f)

print(f"총 샘플 수: {len(data)}")
print(json.dumps(data[0], ensure_ascii=False, indent=2)[:1200], "...\n")


총 샘플 수: 198
{
  "contract_id": 466,
  "norm1": {
    "norm_id": 82524,
    "norm_text": "To this end, subject to any confidentiality agreements Solectron may have, Solectron will both inform and provide a commercially reasonable opportunity for acquisition of new and emerging Solectron and industry technology.",
    "fusion_graph": {
      "edges": [
        {
          "source": "Solectron",
          "relation": "will",
          "target": "inform acquisition of new and emerging Solectron and industry technology subject to confidentiality agreements",
          "source_graph": "eventic"
        },
        {
          "source": "Solectron",
          "relation": "will",
          "target": "provide opportunity for acquisition of new and emerging Solectron and industry technology subject to confidentiality agreements",
          "source_graph": "eventic"
        },
        {
          "source": "Solectron",
          "relation": "successor",
          "target": "Flextronics",
         

## 프롬프트 빌더 & 생성 함수

In [5]:
SYSTEM_PROMPT = """You are a precise contract-compliance analyst.
Given two norms from the same contract and their fused graphs, decide if they CONFLICT.
Output strictly one digit: 1 if they conflict (mutually unsatisfiable under the same conditions), 0 otherwise.
Do not add any words or punctuation. Only '1' or '0'."""

def filter_edges_by_flags(edges):
    """
    Ablation 스위치에 따라 edge 리스트를 필터링.
    - FULL_GRAPH_ON=True면 그대로 반환 (모든 source_graph 포함).
    - False면 각 스위치(EVENTIC/ENTITY/TERM_DEFINITION/CONCEPT)에 따라 필터링.
    """
    if edges is None:
        return []
    if FULL_GRAPH_ON:
        return edges

    allowed = set()
    if EVENTIC_ON:
        allowed.add("eventic")
    if ENTITY_ON:
        allowed.add("entity")
    if TERM_DEFINITION_ON:
        allowed.add("term_definition")
    if CONCEPT_ON:
        allowed.add("concept")

    out = []
    for e in edges:
        sg = str(e.get("source_graph", "")).strip().lower()
        if sg in allowed:
            out.append(e)
    return out

def edges_to_lines(edges, max_edges=None):
    lines = []
    filt = filter_edges_by_flags(edges)
    use = filt if (max_edges is None) else filt[:max_edges]
    for e in use:
        s = str(e.get("source","")).strip()
        r = str(e.get("relation","")).strip()
        t = str(e.get("target","")).strip()
        lines.append(f"[{s}] [{r}] [{t}]")
    return "\n".join(lines)

def build_user_prompt(sample, max_edges_per_norm=60):
    n1 = sample.get("norm1", {})
    n2 = sample.get("norm2", {})
    n1_id = n1.get("norm_id", "")
    n2_id = n2.get("norm_id", "")
    n1_text = (n1.get("norm_text") or "").strip()
    n2_text = (n2.get("norm_text") or "").strip()
    n1_edges = (n1.get("fusion_graph", {}) or {}).get("edges", [])
    n2_edges = (n2.get("fusion_graph", {}) or {}).get("edges", [])

    # 프롬프트 상단에 어떤 그래프 컴포넌트를 포함했는지 명시
    if FULL_GRAPH_ON:
        components_note = "Included graph components: ALL (full graph: eventic, entity, term_definition, concept)"
    else:
        included = []
        if EVENTIC_ON: included.append("eventic")
        if ENTITY_ON: included.append("entity")
        if TERM_DEFINITION_ON: included.append("term_definition")
        if CONCEPT_ON: included.append("concept")
        if not included:
            components_note = "Included graph components: NONE (edges omitted by ablation)"
        else:
            components_note = "Included graph components: " + ", ".join(included)

    n1_lines = edges_to_lines(n1_edges, max_edges=max_edges_per_norm)
    n2_lines = edges_to_lines(n2_edges, max_edges=max_edges_per_norm)

    user = f"""Decide whether the following two norms conflict.
{components_note}

Norm 1 (id={n1_id}):
{n1_text}

Norm 2 (id={n2_id}):
{n2_text}

Fusion Graph (Norm 1) — one per line: [source] [relation] [target]
{n1_lines}

Fusion Graph (Norm 2) — one per line: [source] [relation] [target]
{n2_lines}

Labeling rule:
- Output 1 if the two norms impose incompatible duties/permissions about the same agent/action under the same conditions such that both cannot be satisfied at once.
- Output 0 otherwise (e.g., different scope/actors/timing, or both can be satisfied).

Answer with ONLY one digit: 1 or 0.
"""
    return user


import re
import torch

@torch.inference_mode()
def predict_label(sample, max_new_tokens=3, temperature=0.0):
    messages = [
        {"role":"system", "content": SYSTEM_PROMPT},
        {"role":"user", "content": build_user_prompt(sample)}
    ]
    text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    inputs = tokenizer([text], return_tensors="pt").to(model.device)

    outputs = model.generate(
        **inputs,
        max_new_tokens=max_new_tokens,
        do_sample=False if temperature==0.0 else True,
        temperature=temperature,
        pad_token_id=tokenizer.eos_token_id,
    )
    gen_ids = outputs[0, inputs["input_ids"].shape[1]:]
    out = tokenizer.decode(gen_ids, skip_special_tokens=True).strip()

    # 첫 번째로 보이는 0/1만 사용
    m = re.search(r"[01]", out)
    if m:
        return int(m.group(0)), out
    return 0, out


In [6]:
# True False
TERM_DEFINITION_ON = True
ENTITY_ON = True
CONCEPT_ON = True
EVENTIC_ON = True
FULL_GRAPH_ON = True # 이게 True면 위의 모든 스위치를 무시하고 모든 내용이 포함됨

## 전수 추론 루프 (tqdm 진행바)

In [7]:
golds, preds, raw_outputs = [], [], []

# fp16에서 메모리 여유가 없다면 60 → 40/30으로 낮추세요.
max_edges_per_norm = 60

from tqdm import tqdm

for sample in tqdm(data, desc="LLM inference"):
    if "norm1" not in sample or "norm2" not in sample or "conflict" not in sample:
        continue

    y_pred, raw = predict_label(sample, max_new_tokens=3, temperature=0.0)
    y_true = int(sample["conflict"])

    preds.append(y_pred)
    golds.append(y_true)
    raw_outputs.append(raw)

print("샘플 수(유효):", len(golds))


LLM inference:   0%|          | 0/198 [00:00<?, ?it/s]The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
LLM inference:   1%|          | 1/198 [00:00<02:25,  1.36it/s]The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
LLM inference:   1%|          | 2/198 [00:00<01:11,  2.75it/s]The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
LLM inference:   2%|▏         | 3/198 [00:00<00:49,  3.98it/s]The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
LLM inference:   2%|▏         | 4/198 [00:01<00:37,  5.21it/s]The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` f

샘플 수(유효): 198





## 평가지표 계산 (Accuracy & MCC)

In [8]:
from sklearn.metrics import accuracy_score, matthews_corrcoef

acc = accuracy_score(golds, preds)
mcc = matthews_corrcoef(golds, preds)
print(f"Accuracy: {acc:.4f}")
print(f"Matthews Correlation Coefficient: {mcc:.4f}")

Accuracy: 0.6818
Matthews Correlation Coefficient: 0.4714
