In [9]:
# ===============================
# 0) 라이브러리 로딩 & 환경 고정
# ===============================
import os
import re
import random
import numpy as np
import pandas as pd
import torch

from datasets import Dataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    accuracy_score,
    precision_recall_fscore_support,
    classification_report,
    confusion_matrix,
    f1_score,
)

from transformers import (
    AutoTokenizer,
    RobertaForSequenceClassification,
    Trainer,
    TrainingArguments,
    DataCollatorWithPadding,
    set_seed,
)
from google.colab import drive
drive.mount('/content/drive')

# 재현성 고정
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(SEED)
set_seed(SEED)

device = 0 if torch.cuda.is_available() else -1
print(f"[INFO] device: {'cuda' if device == 0 else 'cpu'}")

# ===============================
# 1) 데이터 로딩 & 전처리
# ===============================
CSV_PATH = "/content/drive/MyDrive/KDH/dataset/cleaned_datasets (0814)_utf8.csv"
df = pd.read_csv(CSV_PATH)
df["label"] = df["label"].astype(int)

def clean_text(text: str) -> str:
    # 연속 마침표 → 하나로
    text = re.sub(r"\.{2,}", ".", text)
    # 영어/숫자와 한글 사이 공백
    text = re.sub(r"([A-Za-z0-9])([가-힣])", r"\1 \2", text)
    text = re.sub(r"([가-힣])([A-Za-z0-9])", r"\1 \2", text)
    # 특수문자 제거 (.,!?는 유지)
    text = re.sub(r"[^\w\s.,!?]", "", text)
    # 다중 공백 제거
    text = re.sub(r"\s+", " ", text).strip()
    return text

X_train, X_test, y_train, y_test = train_test_split(
    df["text"].apply(clean_text),
    df["label"],
    test_size=0.2,
    stratify=df["label"],
    random_state=SEED,
)

# ===============================
# 2) 토크나이저/데이터셋 (동적 패딩)
# ===============================
MODEL_NAME = "klue/roberta-base"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
tokenizer.model_max_length = 512  # 안전한 최대 길이

def tokenize_fn(batch):
    # padding은 collator가 담당 → 여기선 truncation만
    return tokenizer(batch["text"], truncation=True)

# Hugging Face Dataset 생성 (라벨 키는 'labels'로 통일!)
train_ds = Dataset.from_dict({"text": X_train.tolist(), "labels": y_train.tolist()})
test_ds  = Dataset.from_dict({"text": X_test.tolist(),  "labels": y_test .tolist()})

# 토큰화 적용
train_ds = train_ds.map(tokenize_fn, batched=True, remove_columns=["text"])
test_ds  = test_ds.map(tokenize_fn, batched=True, remove_columns=["text"])

# 포맷 지정
train_ds.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
test_ds.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])

# 동적 패딩 collator
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

print(f"[INFO] Train size: {len(train_ds)}, Test size: {len(test_ds)}")
print("[INFO] Label dist (train):\n", pd.Series(y_train).value_counts())
print("[INFO] Label dist (test):\n",  pd.Series(y_test).value_counts())

# ===============================
# ===============================
# 3) 모델 로딩 (warning 제거: eager attention 명시)
# ===============================
from transformers import AutoConfig

try:
    model = RobertaForSequenceClassification.from_pretrained(
        MODEL_NAME,
        num_labels=2,
        attn_implementation="eager",  # output_attentions=True와 함께 안전
    )
except TypeError:
    # 구버전 호환: config에 주입
    cfg = AutoConfig.from_pretrained(MODEL_NAME)
    setattr(cfg, "attn_implementation", "eager")
    model = RobertaForSequenceClassification.from_pretrained(
        MODEL_NAME,
        config=cfg,
        num_labels=2,
    )

# id2label/label2id 세팅(해석/로깅 편의)
id2label = {0: "정상", 1: "보이스피싱"}
label2id = {"정상": 0, "보이스피싱": 1}
model.config.id2label = id2label
model.config.label2id = label2id

# id2label/label2id 세팅(해석/로깅 편의)
id2label = {0: "정상", 1: "보이스피싱"}
label2id = {"정상": 0, "보이스피싱": 1}
model.config.id2label = id2label
model.config.label2id = label2id

# ===============================
# ===============================
# ===============================
# # ===============================
# 4) 평가지표 & 학습 설정 — 버전 호환 안전 패치
# ===============================
import transformers, inspect

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = logits.argmax(axis=-1)
    p, r, f1, _ = precision_recall_fscore_support(labels, preds, average="binary", pos_label=1)
    acc = accuracy_score(labels, preds)
    return {"accuracy": acc, "precision": p, "recall": r, "f1": f1}

_desired_kwargs = dict(
    output_dir="./klue-roberta-voice",
    evaluation_strategy="epoch",   # 신버전용
    save_strategy="epoch",         # 신버전용
    learning_rate=5e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=2,
    weight_decay=0.01,
    logging_steps=50,
    load_best_model_at_end=True,   # 전략 불일치/구버전이면 자동 끔
    metric_for_best_model="f1",
    greater_is_better=True,
    fp16=torch.cuda.is_available(),
    warmup_ratio=0.1,
    report_to="none",
    # 구버전 대체(step 기반)
    do_eval=True,
    eval_steps=500,
    save_steps=500,
    logging_dir="./logs",
    evaluate_during_training=True,
)

sig_params = set(inspect.signature(TrainingArguments.__init__).parameters.keys()) - {"self"}
safe_kwargs = {k: v for k, v in _desired_kwargs.items() if k in sig_params}

# evaluation_strategy 미지원 → 구버전: step 기반으로, load_best는 끔
if "evaluation_strategy" not in sig_params:
    for k in ("eval_steps", "save_steps", "do_eval"):
        if k in _desired_kwargs and k in sig_params:
            safe_kwargs[k] = _desired_kwargs[k]
    if "load_best_model_at_end" in sig_params:
        safe_kwargs["load_best_model_at_end"] = False
    safe_kwargs.pop("metric_for_best_model", None)
    safe_kwargs.pop("greater_is_better", None)

# load_best가 켜졌는데 전략 키가 없으면 안전하게 끔
if "load_best_model_at_end" in sig_params and safe_kwargs.get("load_best_model_at_end", False):
    if ("evaluation_strategy" not in sig_params) or ("save_strategy" not in sig_params):
        safe_kwargs["load_best_model_at_end"] = False

training_args = TrainingArguments(**safe_kwargs)

print("[INFO] transformers version:", getattr(transformers, "__version__", "unknown"))
print("[INFO] TrainingArguments accepted keys ->", list(safe_kwargs.keys()))

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_ds,
    eval_dataset=test_ds,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)


# ===============================
# 5) 학습
# ===============================
trainer.train()

# ===============================
# 6) 예측 + 확률 보정(Temperature Scaling)
# ===============================
model.eval()
pred_output = trainer.predict(test_ds)
logits = pred_output.predictions        # (N, 2)
y_true = pred_output.label_ids

# ----- Temperature Scaling -----
class _TempScale(torch.nn.Module):
    def __init__(self, init_T: float = 1.0):
        super().__init__()
        self.T = torch.nn.Parameter(torch.ones(1) * init_T)

    def forward(self, logits_tensor: torch.Tensor) -> torch.Tensor:
        # logits / T
        return logits_tensor / self.T.unsqueeze(1)

def fit_temperature(logits_np: np.ndarray, labels_np: np.ndarray, max_iter=50, lr=0.01) -> float:
    """
    간단한 LBFGS로 NLL 최소화하여 온도 T 학습
    """
    logits_t = torch.tensor(logits_np, dtype=torch.float32)
    labels_t = torch.tensor(labels_np, dtype=torch.long)

    scaler = _TempScale()
    nll = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.LBFGS([scaler.T], lr=lr, max_iter=max_iter, line_search_fn="strong_wolfe")

    def closure():
        optimizer.zero_grad()
        loss = nll(scaler(logits_t), labels_t)
        loss.backward()
        return loss

    optimizer.step(closure)
    T_value = float(scaler.T.detach().cpu().item())
    print(f"[INFO] Fitted Temperature: {T_value:.4f}")
    return max(T_value, 1e-3)

# T 학습(실무에선 별도 검증셋 권장)
T = fit_temperature(logits, y_true)
logits_cal = logits / T

probs_full = torch.softmax(torch.tensor(logits_cal), dim=1).numpy()
p1_cal = probs_full[:, 1]   # 보이스피싱 calibrated 확률

THRESH = 0.7
y_pred = (p1_cal >= THRESH).astype(int)

print("[INFO] Classification report (calibrated p):")
print(classification_report(y_true, y_pred, target_names=["정상", "보이스피싱"], digits=4))

# ===============================
# ===============================
# (필수) 품사 태깅 도구 설치/로딩
# ===============================
!pip -q install konlpy==0.6.0
from konlpy.tag import Okt
okt = Okt()

# 원한다면 여기서 불용어(명사지만 제외하고 싶은 단어) 추가하세요
STOP_NOUNS = {
    "쪽", "부분", "경우", "상황", "발생", "문제", "고객", "안내", "확인", "처리",
    "연락", "상담", "번호", "내용", "시스템", "담당", "사용자", "현재", "오늘",
}
# ===============================
# 7) 증거 윈도우 + Δp1(마스킹 영향도) 기반 키워드 — ROLLOUT ONLY
# ===============================
import re
import torch
from torch.nn.functional import softmax
import numpy as np

# 문장 분할 & 윈도우 선택 — 가변 길이 look-behind 제거
_SENT_SPLIT = re.compile(r'(?:(?<=[.?!])|(?<=[가-힣]\))|(?<=요))\s+')

def split_sentences_ko(text: str):
    sents = [s.strip() for s in _SENT_SPLIT.split(text) if s and s.strip()]
    return sents if sents else [text.strip()]


def join_window(sents, i, radius=1, max_chars=400):
    left = max(0, i - radius)
    right = min(len(sents), i + radius + 1)
    cand = " ".join(sents[left:right])
    return cand if len(cand) <= max_chars else cand[:max_chars]

@torch.no_grad()
def p1_text(model, tokenizer, text: str):
    enc = tokenizer(text, return_tensors="pt", truncation=True,
                    max_length=getattr(tokenizer, "model_max_length", 512),
                    padding=False).to(model.device)
    logits = model(**enc).logits
    return float(softmax(logits, dim=-1)[0, 1].item())

def pick_best_window(text: str, model, tokenizer, radius=1, max_chars=400):
    sents = split_sentences_ko(text)
    if len(sents) == 1:
        return sents[0], [sents[0]]
    best_p, best_w = -1.0, None
    for i in range(len(sents)):
        w = join_window(sents, i, radius=radius, max_chars=max_chars)
        p = p1_text(model, tokenizer, w)
        if p > best_p:
            best_p, best_w = p, w
    return best_w, sents

@torch.no_grad()
def _token_scores_rollout(model, tokenizer, text, head_reduction="mean"):
    enc = tokenizer(text, return_tensors="pt", truncation=True,
                    max_length=getattr(tokenizer, "model_max_length", 512),
                    padding=False).to(model.device)
    out = model(**enc, output_attentions=True)

    mats=[]
    for A in out.attentions:                 # tuple(L) of (B,H,S,S)
        A = A.mean(dim=1) if head_reduction=="mean" else A.max(dim=1).values
        A = A[0]
        A = A + torch.eye(A.size(-1), device=A.device)   # residual
        A = A / (A.sum(dim=-1, keepdim=True)+1e-6)
        mats.append(A)
    R = mats[0]
    for k in range(1,len(mats)):
        R = R @ mats[k]
    scores = R[0]                            # CLS -> tokens
    ids = enc["input_ids"][0].cpu()
    tokens = tokenizer.convert_ids_to_tokens(ids)
    return tokens, scores.detach().cpu().numpy(), enc

def _merge_subwords(tokens, scores, specials):
    words, word_scores, spans = [], [], []
    cur_w, cur_s, cur_span = "", 0.0, []
    def flush():
        nonlocal cur_w, cur_s, cur_span
        if cur_w != "":
            words.append(cur_w.replace("▁",""))
            word_scores.append(cur_s)
            spans.append(cur_span[:])
            cur_w, cur_s, cur_span = "", 0.0, []
    keep_idx = [i for i,t in enumerate(tokens) if t not in specials]
    toks = [tokens[i] for i in keep_idx]
    scs  = [scores[i] for i in keep_idx]
    for i,(t,s) in enumerate(zip(toks, scs)):
        if t.startswith("▁") or not t.startswith("##"):
            flush(); cur_w = t; cur_s = float(s); cur_span=[i]
        else:
            cur_w += t[2:]; cur_s += float(s); cur_span.append(i)
    flush()
    return keep_idx, toks, np.array(scs), words, np.array(word_scores), spans

@torch.no_grad()
def extract_keywords_with_delta_rollout(
    text, model, tokenizer, head_reduction="mean",
    top_k=5, prelim_k=20, min_delta=0.03
):
    tokens, tok_scores, enc = _token_scores_rollout(model, tokenizer, text, head_reduction)
    specials = set(tokenizer.all_special_tokens) | {"<s>","</s>","<pad>"}
    keep_idx, kept_tokens, kept_scores, words, word_scores, spans = _merge_subwords(tokens, tok_scores, specials)
    if len(words)==0 or word_scores.sum()==0:
        base_p = p1_text(model, tokenizer, text)
        return [], base_p

    # 길이 보정 후 후보 확장
    word_scores = word_scores * len(kept_tokens)
    idx_pre = word_scores.argsort()[::-1][:max(prelim_k, top_k*4)]
    base_p = p1_text(model, tokenizer, text)

    mask_id = tokenizer.mask_token_id
    ids0 = enc["input_ids"][0].clone()
    results=[]
    for j in idx_pre:
        orig_span = [keep_idx[s] for s in spans[j]]
        ids_m = ids0.clone()
        for pos in orig_span:
            ids_m[pos] = mask_id
        enc_m = {"input_ids": ids_m.unsqueeze(0).to(model.device),
                 "attention_mask": enc["attention_mask"]}
        p1_m = float(softmax(model(**enc_m).logits, dim=-1)[0,1].item())
        delta = base_p - p1_m
        if delta >= min_delta:
            results.append({"word": words[j], "delta": float(delta), "attn": float(word_scores[j])})

    results.sort(key=lambda x: (-x["delta"], -x["attn"], -len(x["word"])))
    return results[:top_k], base_p

@torch.no_grad()
def evidential_check_rollout(text, model, tokenizer,
                             top_k=5, min_delta=0.03, suff_th=0.55, comp_th=0.08):
    kw, base_p = extract_keywords_with_delta_rollout(
        text, model, tokenizer, top_k=top_k,
        prelim_k=max(20, top_k*4), min_delta=min_delta
    )
    if base_p < suff_th or not kw:
        return False, kw, base_p, 0.0

    masked = text
    for w in sorted([k["word"] for k in kw], key=len, reverse=True):
        masked = re.sub(re.escape(w), tokenizer.mask_token, masked)
    p1_m = p1_text(model, tokenizer, masked)
    comp = base_p - p1_m
    return comp >= comp_th, kw, base_p, comp

def get_keywords_from_text_rollout(
    text, model, tokenizer, radius=1, max_chars=400,
    top_k=5, min_delta=0.03, suff_th=0.55, comp_th=0.08
):
    win, _ = pick_best_window(text, model, tokenizer, radius=radius, max_chars=max_chars)
    ok, kw, base_p, comp = evidential_check_rollout(
        win, model, tokenizer, top_k=top_k,
        min_delta=min_delta, suff_th=suff_th, comp_th=comp_th
    )
    if not ok:
        return {"window": win, "base_p": base_p, "comp": comp, "keywords": []}
    return {"window": win, "base_p": base_p, "comp": comp, "keywords": kw}


# ===============================
# 8) 결과 CSV 저장 (명사 top_k=3, 전역 0~100점)
# ===============================
# ===============================
# 8) 결과 저장 — ROLLOUT ONLY
# ===============================
TOP_K   = 5
THRESH  = 0.7        # calibrated 확률 임계값(위에서 쓰던 값 그대로)
SUFF_TH = 0.55       # 윈도우 충분성
COMP_TH = 0.08       # 포괄성(동시 마스킹 Δ)
MIN_DELTA = 0.03     # 키워드 개별 Δ 임계값

def save_predictions_with_evidence_rollout(
    top_k=TOP_K, thresh=THRESH,
    suff_th=SUFF_TH, comp_th=COMP_TH, min_delta=MIN_DELTA,
    out_prefix="prediction_with_keywords_v2_rollout"
):
    rows=[]
    model.eval()
    for i in range(len(X_test)):
        sent = X_test.iloc[i]
        label_true = int(y_true[i])
        prob1 = float(p1_cal[i])              # temperature scaling된 최종 확률
        label_pred = int(prob1 >= thresh)

        info = get_keywords_from_text_rollout(
            sent, model, tokenizer,
            radius=1, max_chars=400,
            top_k=top_k, min_delta=min_delta,
            suff_th=suff_th, comp_th=comp_th
        )
        kw_str   = ", ".join([f"{k['word']}" for k in info["keywords"]]) if info["keywords"] else ""
        kw_delta = "; ".join([f"{k['word']}:{k['delta']:.3f}" for k in info["keywords"]]) if info["keywords"] else ""

        rows.append({
            "문장(원본)": sent,
            "실제 라벨": label_true,
            "예측 라벨": label_pred,
            "보이스피싱 확률(p1_cal)": prob1,
            "증거윈도우": info["window"],
            "윈도우 p(보피)": info["base_p"],
            "윈도우 포괄성Δ": info["comp"],
            "근거 키워드(Rollout)": kw_str,
            "키워드Δ(각)": kw_delta,
            "임계값(최종)": thresh,
        })

    out_path = f"/content/drive/MyDrive/KDH/dataset/{out_prefix}_top{top_k}_thr{int(thresh*100)}.csv"
    pd.DataFrame(rows).to_csv(out_path, index=False, encoding="utf-8-sig")
    print(f"✅ '{out_path}' 저장 완료 (rollout only, top_k={top_k}, thr={thresh}, suff≥{suff_th}, comp≥{comp_th})")

# 실행
save_predictions_with_evidence_rollout()




Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
[INFO] device: cuda


Map:   0%|          | 0/4131 [00:00<?, ? examples/s]

Map:   0%|          | 0/1033 [00:00<?, ? examples/s]

[INFO] Train size: 4131, Test size: 1033
[INFO] Label dist (train):
 label
1    2805
0    1326
Name: count, dtype: int64
[INFO] Label dist (test):
 label
1    702
0    331
Name: count, dtype: int64


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[INFO] transformers version: 4.55.2
[INFO] TrainingArguments accepted keys -> ['output_dir', 'save_strategy', 'learning_rate', 'per_device_train_batch_size', 'per_device_eval_batch_size', 'num_train_epochs', 'weight_decay', 'logging_steps', 'load_best_model_at_end', 'fp16', 'warmup_ratio', 'report_to', 'do_eval', 'eval_steps', 'save_steps', 'logging_dir']


Step,Training Loss
50,0.552
100,0.1583
150,0.2451
200,0.1262
250,0.2148
300,0.1613
350,0.1083
400,0.0984
450,0.0746
500,0.0485


[INFO] Fitted Temperature: 1.3158
[INFO] Classification report (calibrated p):
              precision    recall  f1-score   support

          정상     0.9940    0.9940    0.9940       331
       보이스피싱     0.9972    0.9972    0.9972       702

    accuracy                         0.9961      1033
   macro avg     0.9956    0.9956    0.9956      1033
weighted avg     0.9961    0.9961    0.9961      1033

✅ '/content/drive/MyDrive/KDH/dataset/prediction_with_keywords_v2_rollout_top5_thr70.csv' 저장 완료 (rollout only, top_k=5, thr=0.7, suff≥0.55, comp≥0.08)


In [None]:
!pip -q install --upgrade "transformers>=4.42" "datasets>=2.19" "accelerate>=0.33"


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m126.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m100.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m56.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m11.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m44.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m127.9/127.9 MB[0m [31m20.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━