In [7]:
import argparse
import json
import os
import faiss
import numpy as np
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from sentence_transformers import SentenceTransformer

INDEX_PATH = "rag_index_retriever/faiss.index"
META_PATH  = "rag_index/meta.pkl"          
SFT_DIR    = "models/llama31-8b-sft-fold1"        
#EMB_MODEL  = "nlpai-lab/KURE-v1"
EMB_MODEL = "models/kure-law-retriever"                     


def load_index_and_meta():
    import pickle
    print("FAISS 인덱스 로딩 중...") 
    if not os.path.exists(INDEX_PATH) or not os.path.exists(META_PATH):
        print(f"오류: 인덱스 파일을 찾을 수 없습니다. ({INDEX_PATH}, {META_PATH})")
        return None, None
        
    index = faiss.read_index(INDEX_PATH)
    with open(META_PATH, "rb") as f:
        meta = pickle.load(f)
    print(" 인덱스/메타데이터 로드 완료.")
    return index, meta

def load_sft_model():
    import os
    from peft import PeftModel
    from transformers import BitsAndBytesConfig
    
    print("LLM 모델 로딩 중...") 

    HF_TOKEN = os.environ.get("HF_TOKEN")  
    BASE_MODEL = "meta-llama/Meta-Llama-3.1-8B-Instruct"
    ADAPTER_DIR = SFT_DIR  # 상단 상수 사용

    bnb_cfg = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.float16,
        bnb_4bit_use_double_quant=True,
    )

    tok = AutoTokenizer.from_pretrained(BASE_MODEL, use_fast=True, token=HF_TOKEN)
    if tok.pad_token is None:
        tok.pad_token = tok.eos_token

    base = AutoModelForCausalLM.from_pretrained(
        BASE_MODEL,
        quantization_config=bnb_cfg,
        attn_implementation="sdpa",
        device_map="auto",
        torch_dtype=torch.float16,
        token=HF_TOKEN,
    )

    model = PeftModel.from_pretrained(base, ADAPTER_DIR)

    print(" LLM 모델 로드 완료.")
    return tok, model

def load_embedder():
    print("임베딩 모델 로딩 중...") 
    return SentenceTransformer(EMB_MODEL) 

@torch.no_grad()
def run_sft(tok, mdl, clause_text: str) -> str:
    system = (
        "당신은 약관의 공정성을 분석하는 법률 전문가입니다.\n"
        "문맥상 주체 (고객/ 사업자) 를 명확히 구분하세요.\n"
        "반드시 아래 한 줄 포맷만 출력하세요:\n"
        "분야: <정수> / 불공정여부: <유리|불리> / 근거: <간결한 문장 또는 '해당 없음'>"
    )
    user = f"다음 약관 조항의 문맥을 이해하여 분야 분류, 불공정 여부 판단, 판단 근거를 요약하시오.\n\n입력:\n{clause_text}"

    chat = [
        {"role": "system", "content": system},
        {"role": "user", "content": user}
    ]
    prompt = tok.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
    inputs = tok(prompt, return_tensors="pt").to(mdl.device)
    out_ids = mdl.generate(
        **inputs, 
        max_new_tokens=256, 
        do_sample=False
    )
    out_txt = tok.decode(out_ids[0], skip_special_tokens=True)
    ans = out_txt.split("assistant\n")[-1].strip()
    return ans

def parse_reason(answer_line: str) -> str:
    parts = [p.strip() for p in answer_line.split("/") if p.strip()]
    reason = ""
    for p in parts:
        if p.startswith("근거:"):
            reason = p.replace("근거:", "").strip()
            break
    return reason

def embed(embedder, texts):
    embs = embedder.encode(texts, normalize_embeddings=True)
    return np.asarray(embs, dtype="float32")

def search(index, query_vec, topk=5):
    D, I = index.search(query_vec, topk)
    return I[0], D[0]

def build_report(clause_text, sft_answer, meta, hits=None, similarities=None):
    answer_str = sft_answer.strip()
    is_unfair = "불공정여부: 불리" in answer_str

    report = {
        "input_clause": clause_text,
        "llm_output": answer_str,
    }

    laws = []
    if is_unfair and hits is not None and len(hits) > 0:
        if similarities is not None and len(similarities) == len(hits):
            hit_data = zip(hits, similarities)
        else:
            hit_data = zip(hits, [None] * len(hits))

        for idx, sim in hit_data:
            if idx < 0 or idx >= len(meta): continue 
            
            rec = meta[int(idx)]
            law_entry = {
                "clauseField": rec.get("clauseField"),
                "law_text": rec.get("law_text"),
                "similarity": float(sim) if sim is not None else None
            }
            laws.append(law_entry)
            
    report["retrieved_laws"] = laws
    return report


def main():
    print("===" * 20)
    print("   Law RAG Console Application 시작")
    print("===" * 20)

    #  모델 및 데이터 로드
    try:
        index, meta = load_index_and_meta()
        if index is None: return

        tok, mdl = load_sft_model()
        embedder = load_embedder()
        print("\n모든 모델 로드 완료\n")
    except Exception as e:
        print(f"\n[치명적 오류] 모델 로딩 실패: {e}")
        return

    #  사용자 입력 루프
    while True:
        print("-" * 60)
        clause = input("분석할 약관 조항을 입력하세요 (종료하려면 'q' 또는 'exit' 입력):\n>> ")
        
        if clause.lower() in ['q', 'exit', 'quit']:
            print("프로그램을 종료합니다.")
            break
        
        if not clause.strip():
            print("! 내용을 입력해주세요.")
            continue

        print("\n... 분석 중입니다 ...\n")
        
        try:
            # SFT 추론
            answer = run_sft(tok, mdl, clause)
            reason = parse_reason(answer)

            # 불공정(불리) 판정 시에만 RAG 검색 수행
            if "불공정여부: 유리" in answer:
                report = build_report(clause, answer, meta, hits=None, similarities=None)
            else:
                fused_query = f"{clause}\n\n판단근거: {reason}" if reason else clause
                qv = embed(embedder, [fused_query])
                ids, similarities = search(index, qv, topk=5)
                report = build_report(clause, answer, meta, hits=ids, similarities=similarities)

            # 결과 출력 
            print("\n[분석 결과]")
            print(json.dumps(report, indent=2, ensure_ascii=False))
            
        except Exception as e:
            print(f"분석 중 오류 발생: {e}")

if __name__ == "__main__":
    main()

   Law RAG Console Application 시작
FAISS 인덱스 로딩 중...
 인덱스/메타데이터 로드 완료.
LLM 모델 로딩 중...

[치명적 오류] 모델 로딩 실패: Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit the quantized model. If you want to dispatch the model on the CPU or the disk while keeping these modules in 32-bit, you need to set `llm_int8_enable_fp32_cpu_offload=True` and pass a custom `device_map` to `from_pretrained`. Check https://huggingface.co/docs/transformers/main/en/main_classes/quantization#offload-between-cpu-and-gpu for more details. 


In [None]:
import torch, os, json, pandas as pd
from datasets import Dataset
from transformers import (
    AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
)
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from trl import SFTTrainer, SFTConfig

MY_TOKEN = os.environ.get("HF_TOKEN")            
BASE_MODEL = "meta-llama/Meta-Llama-3.1-8B-Instruct"
MAX_SEQ_LEN = 512
OUTPUT_DIR = "models/llama31-8b-sft-fold10"

os.environ["TORCH_COMPILE_DISABLE"] = "1"

bnb_cfg = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,    
    bnb_4bit_use_double_quant=True,
)

print(f"'{BASE_MODEL}' 로드 중 ...")
tok = AutoTokenizer.from_pretrained(BASE_MODEL, use_fast=True, token=MY_TOKEN)
if tok.pad_token is None:
    tok.pad_token = tok.eos_token

mdl = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    quantization_config=bnb_cfg,
    attn_implementation="sdpa",             
    device_map="auto",
    dtype=torch.float16,
    token=MY_TOKEN,
)
print("--- 모델 로딩 완료 ---")

mdl = prepare_model_for_kbit_training(mdl)

lora_cfg = LoraConfig(
    r=8, lora_alpha=16, lora_dropout=0.05,
    target_modules=["q_proj","k_proj","v_proj",
                    "o_proj","gate_proj","up_proj","down_proj"],
    bias="none", task_type="CAUSAL_LM",
)
mdl = get_peft_model(mdl, lora_cfg)

TRAIN_PATH = "data/kfold_data/train_fold_10.jsonl"
VAL_PATH   = "data/kfold_data/val_fold_10.jsonl"

def load_jsonl(path):
    with open(path, encoding="utf-8") as f:
        return [json.loads(l) for l in f if l.strip()]

train_rows = load_jsonl(TRAIN_PATH)
val_rows = load_jsonl(VAL_PATH)

train_df = pd.DataFrame(train_rows)
val_df = pd.DataFrame(val_rows)

print(f"Train 샘플 수: {len(train_df)}, Val 샘플 수: {len(val_df)}")

SYSTEM_PROMPT = (
    "당신은 약관의 공정성을 분석하는 법률 전문가입니다.\n"
    "문맥상 주체 (고객/ 사업자) 를 명확히 구분하세요.\n"
    "반드시 아래 한 줄 포맷만 출력하세요:\n"
    "분야: <정수> / 불공정여부: <유리|불리> / 근거: <간결한 문장 또는 '해당 없음'>"
)

def to_messages(r):
    inst, inp, out = r.get("instruction",""), r.get("input",""), r.get("output","")
    user_text = inst if not inp else f"{inst}\n\n입력:\n{inp}"
    return [
        {"role":"system","content":SYSTEM_PROMPT},
        {"role":"user","content":user_text},
        {"role":"assistant","content":out.strip()},
    ]
def format_example(ex):
    text = tok.apply_chat_template(
        to_messages(ex), tokenize=False, add_generation_prompt=False
    )
    return {"text": text}

train_ds = Dataset.from_pandas(train_df)
train_ds = train_ds.map(format_example, remove_columns=list(train_df.columns))

val_ds = Dataset.from_pandas(val_df)
val_ds = val_ds.map(format_example, remove_columns=list(val_df.columns))


print(f"데이터셋: train {len(train_ds)}, val {len(val_ds)}")

sft_cfg = SFTConfig(
    output_dir=OUTPUT_DIR,
    num_train_epochs=1,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=4,
    learning_rate=2e-4,
    warmup_ratio=0.03,
    lr_scheduler_type="cosine",
    logging_strategy="steps",
    logging_steps=10,
    eval_strategy="epoch",             
    save_strategy="epoch",             
    save_total_limit=1,
    max_grad_norm=0.3,
    gradient_checkpointing=True,
    report_to="none",
    fp16=True, bf16=False,
    dataloader_num_workers=0,          
    dataloader_pin_memory=False,
    dataset_text_field="text",
    max_length=MAX_SEQ_LEN,
    packing=False,
    group_by_length=True,               
    seed=42,
)

trainer = SFTTrainer(
    model=mdl,
    train_dataset=train_ds,
    eval_dataset=val_ds,
    args=sft_cfg,
    processing_class=tok,
)

print("--- 파인튜닝 시작 ---")
trainer.train()
print("--- 파인튜닝 완료 ---")

trainer.save_model(OUTPUT_DIR)
tok.save_pretrained(OUTPUT_DIR)
print(f"저장 완료: {OUTPUT_DIR}")


  from .autonotebook import tqdm as notebook_tqdm


NameError: name 'transformers' is not defined

In [1]:
import os
import re
import json
import torch
import gc
import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from tqdm import tqdm

# 로깅 설정
transformers.logging.set_verbosity_error()

# =============================================================================
# 1. 설정 및 상수
# =============================================================================

# 평가할 모델과 데이터 입력 (필요에 따라 수정하세요)
EVAL_TARGETS = [
    ("models/llama31-8b-sft-fold10", "data/kfold_data/val_fold_10.jsonl"),
]

BASE_MODEL = "meta-llama/Meta-Llama-3.1-8B-Instruct"

CATEGORY_MAP = {
    "1": "가맹계약", "2": "공급계약", "3": "분양계약", "4": "신탁계약",
    "5": "임대차계약", "6": "입소, 입주, 입점계약", "7": "신용카드", "8": "은행여신",
    "9": "은행전자금융서비스", "10": "전자결제수단", "11": "전자금융거래",
    "12": "상해보험", "13": "손해보험", "14": "질병보험", "15": "연금보험",
    "16": "자동차보험", "17": "책임보험", "18": "화재보험", "19": "증권사1",
    "20": "증권사2", "21": "증권사3", "22": "여객운송", "23": "화물운송",
    "24": "개인정보취급방침", "25": "게임", "26": "국내·외 여행",
    "27": "결혼정보서비스", "28": "렌트(자동차 이외)", "29": "마일리지/포인트",
    "30": "보증", "31": "사이버몰", "32": "산후조리원", "33": "상조서비스",
    "34": "상품권", "35": "생명보험", "36": "예식업", "37": "온라인서비스",
    "38": "자동차 리스 및 렌트", "39": "체육시설", "40": "택배",
    "41": "통신, 방송서비스", "42": "교육", "43": "매매계약"
}

CATEGORY_HINT_TEXT = "\n".join([f"{k}: {v}" for k, v in CATEGORY_MAP.items()])


# =============================================================================
# 2. 유틸리티 함수
# =============================================================================

def parse_output_line(line: str):
    field_id = None
    unfair = None
    parts = [p.strip() for p in line.split("/")]
    
    # 1. 분야 파싱
    head1 = parts[0] if len(parts) >= 1 else line
    m = re.search(r"(\d+)", head1)
    if m:
        try: field_id = int(m.group(1))
        except ValueError: field_id = None
            
    for k, v in CATEGORY_MAP.items():
        if v in head1:
            try: 
                field_id = int(k)
                break
            except ValueError: continue

    # 2. 불공정여부 파싱
    if len(parts) >= 2: head2 = " / ".join(parts[:2])
    else: head2 = head1

    if "불리" in head2: unfair = 1
    elif "유리" in head2: unfair = 0

    return field_id, unfair


def build_prompt(tok, clause: str, with_category_hint: bool):
    system = (
        "당신은 약관의 공정성을 분석하는 법률 전문가입니다.\n"
        "문맥상 주체 (고객/사업자)를 명확히 구분하세요.\n"
        "반드시 아래 한 줄 포맷만 출력하세요:\n"
        "분야: <정수> / 불공정여부: <유리|불리> / 근거: <간결한 문장 또는 '해당 없음'>"
    )
    if with_category_hint:
        system += (
            "\n\n[분야 번호 안내]\n"
            "분야는 반드시 아래 번호 중 하나여야 합니다.\n"
            f"{CATEGORY_HINT_TEXT}\n"
        )
    user = (
        "다음 약관 조항의 문맥을 이해하여 분야 분류, 불공정 여부 판단, "
        "판단 근거를 요약하세요.\n\n입력:\n" + clause
    )
    chat = [
        {"role": "system", "content": system},
        {"role": "user", "content": user},
    ]
    return tok.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)


def extract_answer_text(raw_txt: str) -> str:
    if "assistant\n" in raw_txt:
        return raw_txt.split("assistant\n")[-1].strip()
    return raw_txt.strip()


# =============================================================================
# 3. 모델 로딩 및 추론 함수
# =============================================================================

def load_llama_base_4bit():
    print("Llama 3.1 Base 4bit 로딩 중...")
    HF_TOKEN = os.environ.get("HF_TOKEN")
    
    bnb_cfg = BitsAndBytesConfig(
        load_in_4bit=True, 
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.float16, 
        bnb_4bit_use_double_quant=True,
    )
    
    tok = AutoTokenizer.from_pretrained(BASE_MODEL, use_fast=True, token=HF_TOKEN)
    if tok.pad_token is None: tok.pad_token = tok.eos_token
    
    base = AutoModelForCausalLM.from_pretrained(
        BASE_MODEL, 
        quantization_config=bnb_cfg, 
        attn_implementation="sdpa",
        device_map="auto", 
        torch_dtype=torch.float16, 
        token=HF_TOKEN,
    )
    base.eval()
    return tok, base


@torch.no_grad()
def run_model(tok, model, clause: str, with_category_hint: bool) -> str:
    prompt = build_prompt(tok, clause, with_category_hint=with_category_hint)
    inputs = tok(prompt, return_tensors="pt").to(model.device)
    out_ids = model.generate(
        **inputs, max_new_tokens=256, do_sample=False,
    )
    return tok.decode(out_ids[0], skip_special_tokens=True)


def evaluate_model(tok, model, data, tag: str, with_category_hint: bool, debug: bool = False):
    y_true_unfair, y_pred_unfair = [], []
    y_true_field, y_pred_field = [], []

    pbar = tqdm(data, desc=f"{tag} 평가 중")
    for idx, ex in enumerate(pbar, start=1):
        clause = ex["input"]
        gold_out = ex["output"]

        g_field, g_unfair = parse_output_line(gold_out)
        if g_unfair is None: continue

        raw_txt = run_model(tok, model, clause, with_category_hint=with_category_hint)
        pred_line = extract_answer_text(raw_txt)
        p_field, p_unfair = parse_output_line(pred_line)

        if debug:
            tqdm.write(f"\n====== [{tag}] Sample {idx} ======")
            tqdm.write(f"GOLD: {gold_out}")
            tqdm.write(f"PRED: {pred_line}")
            tqdm.write(f" -> P_Field: {p_field}, P_Unfair: {p_unfair}")

        # 불공정 여부 후처리 (None인 경우 반대로 예측 처리 or 0 처리, 여기선 기존 로직 유지)
        if p_unfair is None:
            p_unfair = 1 - g_unfair 

        y_true_unfair.append(g_unfair)
        y_pred_unfair.append(p_unfair)

        if g_field is not None:
            y_true_field.append(g_field)
            y_pred_field.append(-1 if p_field is None else p_field)

    # 지표 계산
    unfair_acc = accuracy_score(y_true_unfair, y_pred_unfair)
    unfair_prec, unfair_rec, unfair_f1, _ = precision_recall_fscore_support(
        y_true_unfair, y_pred_unfair, average="binary", pos_label=1,
    )
    field_acc = accuracy_score(y_true_field, y_pred_field)
    field_prec, field_rec, field_f1, _ = precision_recall_fscore_support(
        y_true_field, y_pred_field, average="macro", zero_division=0,
    )

    return {
        "unfair_acc": unfair_acc, "unfair_prec": unfair_prec,
        "unfair_rec": unfair_rec, "unfair_f1": unfair_f1,
        "field_acc": field_acc, "field_f1": field_f1,
    }


# =============================================================================
# 4. 메인 실행 함수
# =============================================================================

def main():
    if not EVAL_TARGETS:
        return

    # [수정됨] Base 모델을 루프 밖에서 한 번만 로드하여 GPU 메모리 문제 방지
    print(">>> 전체 평가 프로세스 시작...")
    tok, base_model = load_llama_base_4bit()

    try:
        for sft_path, val_path in EVAL_TARGETS:
            print(f"\n####################################################")
            print(f"### 평가 대상: {sft_path}")
            print(f"### 검증 파일: {val_path}")
            print(f"####################################################")

            if not os.path.exists(sft_path) or not os.path.exists(val_path):
                print(f" 경로가 존재하지 않습니다. 건너뜁니다.")
                continue

            with open(val_path, "r", encoding="utf-8") as f:
                data = [json.loads(line) for line in f if line.strip()]

            # -------------------------------------------------------
            # [1/2] Base Model 평가 (순정 상태)
            # -------------------------------------------------------
            print("\n[1/2] Base Model 평가 시작...")
            
            # 혹시 이전 루프에서 어댑터가 남아있다면 제거 (안전장치)
            if hasattr(base_model, "disable_adapter_layers"):
                 base_model.disable_adapter_layers()
            
            base_res = evaluate_model(
                tok, base_model, data, tag="Base", with_category_hint=True, debug=False
            )

            # -------------------------------------------------------
            # [2/2] SFT Model 평가 (어댑터 장착)
            # -------------------------------------------------------
            print("\n[2/2] SFT Model 평가 시작...")
            print(f"[SFT] 어댑터 로딩 중... {sft_path}")
            
            # [핵심 수정] 기존 base_model 위에 어댑터만 씌움 (메모리 재사용)
            sft_model = PeftModel.from_pretrained(base_model, sft_path)
            sft_model.eval()

            sft_res = evaluate_model(
                tok, sft_model, data, tag="SFT", with_category_hint=True, debug=False
            )

            # 결과 출력
            print(f"\n====== 최종 결과 비교: {sft_path} ======")
            print("[불공정여부 (불리=Positive)]")
            print(f"Accuracy : Base={base_res['unfair_acc']:.4f}  -> SFT={sft_res['unfair_acc']:.4f}")
            print(f"Precision: Base={base_res['unfair_prec']:.4f}  -> SFT={sft_res['unfair_prec']:.4f}")
            print(f"Recall   : Base={base_res['unfair_rec']:.4f}  -> SFT={sft_res['unfair_rec']:.4f}")
            print(f"F1 Score : Base={base_res['unfair_f1']:.4f}  -> SFT={sft_res['unfair_f1']:.4f}")

            print("\n[분야 분류]")
            print(f"Accuracy : Base={base_res['field_acc']:.4f}  -> SFT={sft_res['field_acc']:.4f}")
            print(f"Macro F1 : Base={base_res['field_f1']:.4f}  -> SFT={sft_res['field_f1']:.4f}")
            print("===================================================\n")
            
            # [중요] 다음 루프나 종료를 위해 어댑터 연결 해제 (메모리 정리)
            # del sft_model 만으로는 부족할 수 있으므로 명시적 제거 시도
            del sft_model
            torch.cuda.empty_cache()
            
            # base_model은 다음 루프를 위해 유지됩니다.

    finally:
        # 프로그램 완전 종료 시 정리
        print(">>> 평가 종료. 메모리 정리 중...")
        del base_model
        del tok
        gc.collect()
        torch.cuda.empty_cache()

if __name__ == "__main__":
    main()

  from .autonotebook import tqdm as notebook_tqdm


>>> 전체 평가 프로세스 시작...
Llama 3.1 Base 4bit 로딩 중...


Loading checkpoint shards: 100%|██████████| 4/4 [00:28<00:00,  7.12s/it]



####################################################
### 평가 대상: models/llama31-8b-sft-fold10
### 검증 파일: data/kfold_data/val_fold_10.jsonl
####################################################

[1/2] Base Model 평가 시작...


Base 평가 중: 100%|██████████| 900/900 [1:03:31<00:00,  4.23s/it]



[2/2] SFT Model 평가 시작...
[SFT] 어댑터 로딩 중... models/llama31-8b-sft-fold10


SFT 평가 중: 100%|██████████| 900/900 [1:15:25<00:00,  5.03s/it]



[불공정여부 (불리=Positive)]
Accuracy : Base=0.7022  -> SFT=0.9789
Precision: Base=0.5568  -> SFT=0.9891
Recall   : Base=0.3403  -> SFT=0.9444
F1 Score : Base=0.4224  -> SFT=0.9663

[분야 분류]
Accuracy : Base=0.3100  -> SFT=0.4322
Macro F1 : Base=0.2812  -> SFT=0.4507

>>> 평가 종료. 메모리 정리 중...
