## 환경설정

In [2]:
!pip -q install \
  "transformers==4.44.2" \
  "tokenizers==0.19.1" \
  "huggingface-hub>=0.24,<0.26" \
  "accelerate>=0.34,<0.35" \
  "peft==0.12.0" \
  "datasets>=2.20.0" \
  "scikit-learn>=1.3.0" \
  "evaluate>=0.4.0" \
  "sentencepiece" \
  "protobuf<6" \
  "pandas" "numpy" "tqdm"


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.7/43.7 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.5/9.5 MB[0m [31m108.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.6/3.6 MB[0m [31m121.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m296.4/296.4 kB[0m [31m27.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m436.6/436.6 kB[0m [31m38.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m324.4/324.4 kB[0m [31m30.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following

## 실험설정

In [3]:
import os, time, json, math, shutil, subprocess, gc, sys
from pathlib import Path
import torch
from datasets import load_dataset
from transformers import (AutoTokenizer, AutoModelForSequenceClassification,
                          TrainingArguments, Trainer, set_seed)
from sklearn.metrics import accuracy_score
from peft import LoraConfig, get_peft_model, TaskType
print("Torch:", torch.__version__)
print("CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("GPU name:", torch.cuda.get_device_name(0))

Torch: 2.8.0+cu126
CUDA available: True
GPU name: Tesla T4


In [4]:
# ===== 사용자 조정 파트 =====
MODEL_NAME = "klue/roberta-base"   # 한국어에 강한 베이스 모델
MAX_LENGTH = 128                    # 제목 길이가 짧아 128로 충분
BATCH_SIZE = 32                     # GPU 여유에 따라 16~64 조정
LR = 2e-5
EPOCHS = 3
WEIGHT_DECAY = 0.01
WARMUP_RATIO = 0.1
SEED = 42

# 전체(=None) 또는 임의의 N개만 사용
SUBSET_SIZE = None   # 예: 500으로 두면 각 split에서 랜덤 500개만 사용

OUTPUT_DIR_BASE = "/content/ynat_runs"
BASELINE_DIR = f"{OUTPUT_DIR_BASE}/baseline_roberta"
LORA_DIR = f"{OUTPUT_DIR_BASE}/lora_roberta"

os.makedirs(OUTPUT_DIR_BASE, exist_ok=True)
set_seed(SEED)


## 데이터셋 로드 & 전처리

In [5]:
# 0) 로드
raw = load_dataset("klue", "ynat")
print("Available splits:", list(raw.keys()))  # ['train', 'validation'] 만 있을 것

label_names = raw["train"].features["label"].names
num_labels  = len(label_names)
label2id    = {n:i for i,n in enumerate(label_names)}
id2label    = {i:n for n,i in label2id.items()}

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)

def preprocess(batch):
    enc = tokenizer(batch["title"], truncation=True, max_length=MAX_LENGTH)
    enc["labels"] = batch["label"]
    return enc

def maybe_subsample(ds, n=None, seed=SEED):
    if n is None or n >= len(ds):
        return ds
    return ds.shuffle(seed=seed).select(range(n))

# 1) 기본 split 구성
# - validation: 제공된 검증 세트 그대로 사용
# - test: train에서 10%를 라벨 분포 유지(stratify)하며 분리
train_split = raw["train"].train_test_split(
    test_size=0.1, seed=SEED, stratify_by_column="label"
)
ds_train = train_split["train"]
ds_test  = train_split["test"]
ds_val   = raw["validation"]

# (옵션) 샘플 수 축소
ds_train = maybe_subsample(ds_train, SUBSET_SIZE)
ds_val   = maybe_subsample(ds_val,   SUBSET_SIZE)
ds_test  = maybe_subsample(ds_test,  SUBSET_SIZE)

# 2) 전처리
enc_train = ds_train.map(preprocess, batched=True, remove_columns=ds_train.column_names)
enc_val   = ds_val.map(preprocess,   batched=True, remove_columns=ds_val.column_names)
enc_test  = ds_test.map(preprocess,  batched=True, remove_columns=ds_test.column_names)

for ds in (enc_train, enc_val, enc_test):
    ds.set_format(type="torch")

print(enc_train, enc_val, enc_test, sep="\n")
print("Labels:", label_names)



The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md: 0.00B [00:00, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/4.17M [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/847k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/45678 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/9107 [00:00<?, ? examples/s]

Available splits: ['train', 'validation']


tokenizer_config.json:   0%|          | 0.00/375 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/173 [00:00<?, ?B/s]



Map:   0%|          | 0/41110 [00:00<?, ? examples/s]

Map:   0%|          | 0/9107 [00:00<?, ? examples/s]

Map:   0%|          | 0/4568 [00:00<?, ? examples/s]

Dataset({
    features: ['input_ids', 'token_type_ids', 'attention_mask', 'labels'],
    num_rows: 41110
})
Dataset({
    features: ['input_ids', 'token_type_ids', 'attention_mask', 'labels'],
    num_rows: 9107
})
Dataset({
    features: ['input_ids', 'token_type_ids', 'attention_mask', 'labels'],
    num_rows: 4568
})
Labels: ['IT과학', '경제', '사회', '생활문화', '세계', '스포츠', '정치']


## 유틸(정확도, 시간/메모리, 모델/폴더 크기)

In [6]:
from datetime import timedelta

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = logits.argmax(axis=-1)
    return {"accuracy": accuracy_score(labels, preds)}

def human_size(num_bytes: int) -> str:
    units = ["B","KB","MB","GB","TB"]
    i = 0
    n = float(num_bytes)
    while n >= 1024 and i < len(units)-1:
        n /= 1024.0
        i += 1
    return f"{n:.2f} {units[i]}"

def get_dir_size(path: str) -> int:
    total = 0
    p = Path(path)
    if not p.exists():
        return 0
    for f in p.rglob("*"):
        if f.is_file():
            total += f.stat().st_size
    return total

def gpu_mem_peak_bytes():
    if not torch.cuda.is_available():
        return 0
    torch.cuda.synchronize()
    return torch.cuda.max_memory_reserved()  # reserved가 보수적으로 기록됨

def reset_gpu_peak():
    if torch.cuda.is_available():
        torch.cuda.reset_peak_memory_stats()
        torch.cuda.empty_cache()

def print_nvidia_smi():
    if not torch.cuda.is_available():
        print("No CUDA.")
        return
    try:
        out = subprocess.check_output(["nvidia-smi", "--query-gpu=memory.used,memory.total,name",
                                       "--format=csv,noheader"], text=True)
        print(out)
    except Exception as e:
        print("nvidia-smi error:", e)


## fine-Tuning

In [7]:
# 이전 결과 삭제(재실행 대비)
shutil.rmtree(BASELINE_DIR, ignore_errors=True)

model = AutoModelForSequenceClassification.from_pretrained(
    MODEL_NAME,
    num_labels=num_labels,
    id2label=id2label,
    label2id=label2id,
)

fp16 = torch.cuda.is_available()  # GPU면 자동으로 fp16 켜기
args = TrainingArguments(
    output_dir=BASELINE_DIR,
    learning_rate=LR,
    per_device_train_batch_size=BATCH_SIZE,
    per_device_eval_batch_size=BATCH_SIZE,
    num_train_epochs=EPOCHS,
    weight_decay=WEIGHT_DECAY,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    logging_steps=50,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    greater_is_better=True,
    warmup_ratio=WARMUP_RATIO,
    fp16=fp16,
    report_to="none",
    seed=SEED,
)

trainer = Trainer(
    model=model,
    args=args,
    train_dataset=enc_train,
    eval_dataset=enc_val,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

reset_gpu_peak()
t0 = time.time()
print("== Baseline Training Started ==")
train_result = trainer.train()
t1 = time.time()

baseline_train_seconds = t1 - t0
baseline_val = trainer.evaluate(enc_val)
baseline_test = trainer.evaluate(enc_test)
baseline_gpu_peak = gpu_mem_peak_bytes()

# 저장
trainer.save_model(BASELINE_DIR)
tokenizer.save_pretrained(BASELINE_DIR)

baseline_size = get_dir_size(BASELINE_DIR)

print("\n== Baseline Results ==")
print("Validation:", baseline_val)
print("Test      :", baseline_test)
print("Train time:", str(timedelta(seconds=int(baseline_train_seconds))))
print("GPU peak  :", human_size(baseline_gpu_peak))
print("Model dir :", BASELINE_DIR, human_size(baseline_size))
print("\n[nvidia-smi snapshot]")
print_nvidia_smi()


config.json:   0%|          | 0.00/546 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/443M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


== Baseline Training Started ==


Epoch,Training Loss,Validation Loss,Accuracy
1,0.3819,0.402128,0.863951
2,0.3329,0.401755,0.862523
3,0.2163,0.386923,0.871527



== Baseline Results ==
Validation: {'eval_loss': 0.3869233727455139, 'eval_accuracy': 0.8715273965081806, 'eval_runtime': 7.614, 'eval_samples_per_second': 1196.082, 'eval_steps_per_second': 37.431, 'epoch': 3.0}
Test      : {'eval_loss': 0.36889177560806274, 'eval_accuracy': 0.8861646234676007, 'eval_runtime': 6.6462, 'eval_samples_per_second': 687.307, 'eval_steps_per_second': 21.516, 'epoch': 3.0}
Train time: 0:08:35
GPU peak  : 2.40 GB
Model dir : /content/ynat_runs/baseline_roberta 4.13 GB

[nvidia-smi snapshot]
2600 MiB, 15360 MiB, Tesla T4



In [12]:
# fine-tuning 모델 테스트 (평가 + 예측)
# =========================

# ==== 설정 ====
MODEL_NAME   = "klue/roberta-base"
BASELINE_DIR = "./ynat_runs/baseline_roberta"
MAX_LENGTH   = 128
SEED         = 42

import os, warnings
import torch
from datasets import load_dataset
from transformers import (
    AutoTokenizer, AutoModelForSequenceClassification,
    Trainer, TrainingArguments, set_seed
)
from sklearn.metrics import accuracy_score

warnings.filterwarnings("ignore")
set_seed(SEED)

device = 0 if torch.cuda.is_available() else -1

# ===== compute_metrics =====
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    # logits: (N, num_labels), labels: (N,)
    preds = logits.argmax(axis=-1)
    return {"accuracy": accuracy_score(labels, preds)}

# ===== enc_test가 없으면 재구성 =====
def ensure_test_enc(tokenizer, max_len=MAX_LENGTH, seed=SEED):
    """
    - KLUE/YNAT: 공개 test split이 없으므로 train에서 10% stratified split으로 test 생성
    - enc_test만 만들고, 평가에만 사용
    """
    raw = load_dataset("klue", "ynat")
    label_names = raw["train"].features["label"].names
    label2id = {n:i for i,n in enumerate(label_names)}
    id2label = {i:n for n,i in label2id.items()}

    def preprocess(batch):
        out = tokenizer(batch["title"], truncation=True, max_length=max_len)
        out["labels"] = batch["label"]
        return out

    split = raw["train"].train_test_split(
        test_size=0.1, seed=seed, stratify_by_column="label"
    )
    ds_test = split["test"]
    enc_test = ds_test.map(preprocess, batched=True, remove_columns=ds_test.column_names)
    enc_test.set_format(type="torch")
    return enc_test, label_names, id2label, label2id

# ===== 로드 & enc_test 준비 =====
# 토크나이저/모델은 학습 저장 디렉토리에서 로드 (라벨 매핑 일관성 보장)
try:
    tokenizer = AutoTokenizer.from_pretrained(BASELINE_DIR, use_fast=True)
except Exception:
    # 혹시 fast tokenizer가 환경에서 문제되면 slow로 fallback
    tokenizer = AutoTokenizer.from_pretrained(BASELINE_DIR, use_fast=False)

model = AutoModelForSequenceClassification.from_pretrained(BASELINE_DIR)

enc_test, label_names, id2label, label2id = ensure_test_enc(tokenizer)

# ===== 평가 =====
args = TrainingArguments(
    output_dir="./tmp_eval_baseline",
    report_to="none",
    per_device_eval_batch_size=64
)
trainer = Trainer(
    model=model,
    args=args,
    eval_dataset=enc_test,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

print("== Baseline Test Metrics ==")
metrics = trainer.evaluate(enc_test)
print(metrics)

# ===== 샘플 예측 (정답 라벨과 확률만 출력) =====
from transformers import pipeline

clf = pipeline(
    "text-classification",
    model=model,
    tokenizer=tokenizer,
    device=device,   # GPU: 0, CPU: -1
    top_k=None       # 최상위 1개만 받기
)

samples = [
    "삼성전자, 반도체 업황 개선 기대감에 주가 급등",
    "정부, 부동산 정책 추가 발표… 전월세 시장 안정화 목표",
    "프로야구 한국시리즈 1차전, 연장 끝에 LG 승리",
    "구글, 차세대 AI 모델 공개… 개발자 행사서 발표",
]

print("\n== Baseline Predictions ==")
for s in samples:
    out = clf(s)
    # pipeline 반환 형식 방어적 처리:
    # - 보통 [{'label': '...', 'score': ...}]
    # - top_k 사용/환경에 따라 [[{...}, {...}, ...]]가 올 수도 있음
    if isinstance(out, list) and len(out) > 0 and isinstance(out[0], dict):
        pred = out[0]
    elif isinstance(out, list) and len(out) > 0 and isinstance(out[0], list):
        pred = out[0][0]
    else:
        # 혹시 다른 포맷이면 그대로 출력하고 다음으로
        print("- 예측 형식을 해석할 수 없어 raw 출력:", out)
        continue

    label = pred.get("label", "N/A")
    score = float(pred.get("score", 0.0)) * 100.0
    print(f"- {label} ({score:.1f}%)")


Map:   0%|          | 0/4568 [00:00<?, ? examples/s]

== Baseline Test Metrics ==


{'eval_loss': 0.36888638138771057, 'eval_model_preparation_time': 0.007, 'eval_accuracy': 0.8861646234676007, 'eval_runtime': 5.5237, 'eval_samples_per_second': 826.982, 'eval_steps_per_second': 13.035}

== Baseline Predictions ==
- 경제 (99.4%)
- 정치 (89.2%)
- 스포츠 (99.7%)
- IT과학 (98.5%)


## Lora Fine Tuning

In [13]:
# 이전 결과 삭제(재실행 대비)
shutil.rmtree(LORA_DIR, ignore_errors=True)

# 베이스 모델: 동일
base_model = AutoModelForSequenceClassification.from_pretrained(
    MODEL_NAME,
    num_labels=num_labels,
    id2label=id2label,
    label2id=label2id,
)

# 로베르타 계열의 어텐션 모듈명(query/key/value/dense)에 LoRA 적용
lora_config = LoraConfig(
    task_type=TaskType.SEQ_CLS,
    r=8,
    lora_alpha=16,
    lora_dropout=0.1,
    target_modules=["query", "key", "value", "dense"],
    bias="none",
)

lora_model = get_peft_model(base_model, lora_config)
lora_model.print_trainable_parameters()  # 몇 % 파라미터가 학습되는지 확인용

lora_args = TrainingArguments(
    output_dir=LORA_DIR,
    learning_rate=LR,
    per_device_train_batch_size=BATCH_SIZE,
    per_device_eval_batch_size=BATCH_SIZE,
    num_train_epochs=EPOCHS,
    weight_decay=WEIGHT_DECAY,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    logging_steps=50,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    greater_is_better=True,
    warmup_ratio=WARMUP_RATIO,
    fp16=fp16,
    report_to="none",
    seed=SEED,
)

lora_trainer = Trainer(
    model=lora_model,
    args=lora_args,
    train_dataset=enc_train,
    eval_dataset=enc_val,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

reset_gpu_peak()
t0 = time.time()
print("== LoRA Training Started ==")
lora_train_result = lora_trainer.train()
t1 = time.time()

lora_train_seconds = t1 - t0
lora_val = lora_trainer.evaluate(enc_val)
lora_test = lora_trainer.evaluate(enc_test)
lora_gpu_peak = gpu_mem_peak_bytes()

# 저장(LoRA 어댑터 + 헤드만 저장됨)
lora_trainer.save_model(LORA_DIR)
tokenizer.save_pretrained(LORA_DIR)

lora_size = get_dir_size(LORA_DIR)

print("\n== LoRA Results ==")
print("Validation:", lora_val)
print("Test      :", lora_test)
print("Train time:", str(timedelta(seconds=int(lora_train_seconds))))
print("GPU peak  :", human_size(lora_gpu_peak))
print("Model dir :", LORA_DIR, human_size(lora_size))
print("\n[nvidia-smi snapshot]")
print_nvidia_smi()


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 1,935,367 || all params: 112,571,150 || trainable%: 1.7192
== LoRA Training Started ==


Epoch,Training Loss,Validation Loss,Accuracy
1,0.4197,0.445804,0.84737
2,0.4135,0.43674,0.844954
3,0.3825,0.411144,0.854398



== LoRA Results ==
Validation: {'eval_loss': 0.4111443758010864, 'eval_accuracy': 0.8543977160426046, 'eval_runtime': 10.5019, 'eval_samples_per_second': 867.173, 'eval_steps_per_second': 27.138, 'epoch': 3.0}
Test      : {'eval_loss': 0.3622466027736664, 'eval_accuracy': 0.8771891418563923, 'eval_runtime': 5.2465, 'eval_samples_per_second': 870.684, 'eval_steps_per_second': 27.257, 'epoch': 3.0}
Train time: 0:07:26
GPU peak  : 4.23 GB
Model dir : /content/ynat_runs/lora_roberta 78.59 MB

[nvidia-smi snapshot]
1820 MiB, 15360 MiB, Tesla T4



In [16]:
# =========================
# LoRA 어댑터 로드 오류(dense) 패치 + 테스트 전체 코드
# =========================

# ---- 설정 ----
MODEL_NAME = "klue/roberta-base"
LORA_DIR   = "./ynat_runs/lora_roberta"
MAX_LENGTH = 128
SEED       = 42

import os, json, warnings
warnings.filterwarnings("ignore")

import torch
from datasets import load_dataset
from transformers import (
    AutoTokenizer, AutoModelForSequenceClassification,
    Trainer, TrainingArguments, set_seed
)
from sklearn.metrics import accuracy_score

set_seed(SEED)
device = 0 if torch.cuda.is_available() else -1

# ---- metrics ----
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = logits.argmax(axis=-1)
    return {"accuracy": accuracy_score(labels, preds)}

# ---- YNAT: test 분리 구성 ----
def build_test_enc(tokenizer, max_len=MAX_LENGTH, seed=SEED):
    raw = load_dataset("klue", "ynat")
    label_names = raw["train"].features["label"].names
    label2id = {n:i for i,n in enumerate(label_names)}
    id2label = {i:n for n,i in label2id.items()}

    def preprocess(batch):
        out = tokenizer(batch["title"], truncation=True, max_length=max_len)
        out["labels"] = batch["label"]
        return out

    split = raw["train"].train_test_split(test_size=0.1, seed=seed, stratify_by_column="label")
    ds_test = split["test"]
    enc_test = ds_test.map(preprocess, batched=True, remove_columns=ds_test.column_names)
    enc_test.set_format(type="torch")
    return enc_test, label_names, id2label, label2id

# ---- 토크나이저 로드 ----
try:
    tokenizer = AutoTokenizer.from_pretrained(LORA_DIR, use_fast=True)
except Exception:
    tokenizer = AutoTokenizer.from_pretrained(LORA_DIR, use_fast=False)

enc_test, label_names, id2label, label2id = build_test_enc(tokenizer)
num_labels = len(label_names)

# ---- 1) adapter_config.json 패치: target_modules에서 'dense' 제거 ----
cfg_path = os.path.join(LORA_DIR, "adapter_config.json")
if os.path.exists(cfg_path):
    with open(cfg_path, "r", encoding="utf-8") as f:
        cfg = json.load(f)
    tmods = cfg.get("target_modules", [])
    if any(m == "dense" for m in tmods):
        cfg["target_modules"] = [m for m in tmods if m != "dense"]
        with open(cfg_path, "w", encoding="utf-8") as f:
            json.dump(cfg, f, ensure_ascii=False, indent=2)
        print("[PATCH] Removed 'dense' from target_modules in adapter_config.json:", tmods, "->", cfg["target_modules"])
    else:
        print("[INFO] adapter_config.json target_modules:", tmods)
else:
    print("[WARN] adapter_config.json not found at", cfg_path)

# ---- 2) 베이스 + 어댑터 로드 (충돌 방지) ----
from peft import PeftModel
base = AutoModelForSequenceClassification.from_pretrained(
    MODEL_NAME,
    num_labels=num_labels,
    id2label=id2label,
    label2id=label2id,
)

# 로딩 시, 패치된 target_modules에 맞춰 어댑터 주입
model = PeftModel.from_pretrained(base, LORA_DIR)
model.eval()
print("[INFO] LoRA adapter loaded on base model.")

# ---- 3) 평가 ----
args = TrainingArguments(
    output_dir="./tmp_eval_lora",
    report_to="none",
    per_device_eval_batch_size=64
)
trainer = Trainer(
    model=model,
    args=args,
    eval_dataset=enc_test,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

print("== LoRA Test Metrics ==")
print(trainer.evaluate(enc_test))

# ---- 4) 샘플 예측(라벨 + 확률 %) ----
from transformers import pipeline
clf = pipeline(
    "text-classification",
    model=model,
    tokenizer=tokenizer,
    device=device,
    top_k=None
)

samples = [
    "국회, 내년도 예산안 쟁점 조율… 여야 막판 협상",
    "애플, 최신 아이패드 공개… 신형 칩셋·배터리 개선",
    "세계 선수권에서 한국 양궁 대표팀 금메달 획득",
    "금리 동결에 주식·채권 시장 혼조세",
]

print("\n== LoRA Predictions ==")
for s in samples:
    out = clf(s)
    # 다양한 반환 포맷 방어
    if isinstance(out, list) and len(out) > 0 and isinstance(out[0], dict):
        pred = out[0]
    elif isinstance(out, list) and len(out) > 0 and isinstance(out[0], list):
        pred = out[0][0]
    else:
        print("- raw:", out); continue

    label = pred.get("label", "N/A")
    score = float(pred.get("score", 0.0)) * 100.0
    print(f"- {label} ({score:.1f}%)")




Map:   0%|          | 0/4568 [00:00<?, ? examples/s]

[PATCH] Removed 'dense' from target_modules in adapter_config.json: ['value', 'query', 'key', 'dense'] -> ['value', 'query', 'key']


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[INFO] LoRA adapter loaded on base model.
== LoRA Test Metrics ==


The model 'PeftModelForSequenceClassification' is not supported for text-classification. Supported models are ['AlbertForSequenceClassification', 'BartForSequenceClassification', 'BertForSequenceClassification', 'BigBirdForSequenceClassification', 'BigBirdPegasusForSequenceClassification', 'BioGptForSequenceClassification', 'BloomForSequenceClassification', 'CamembertForSequenceClassification', 'CanineForSequenceClassification', 'LlamaForSequenceClassification', 'ConvBertForSequenceClassification', 'CTRLForSequenceClassification', 'Data2VecTextForSequenceClassification', 'DebertaForSequenceClassification', 'DebertaV2ForSequenceClassification', 'DistilBertForSequenceClassification', 'ElectraForSequenceClassification', 'ErnieForSequenceClassification', 'ErnieMForSequenceClassification', 'EsmForSequenceClassification', 'FalconForSequenceClassification', 'FlaubertForSequenceClassification', 'FNetForSequenceClassification', 'FunnelForSequenceClassification', 'GemmaForSequenceClassification'

{'eval_loss': 1.9565249681472778, 'eval_model_preparation_time': 0.0094, 'eval_accuracy': 0.10529772329246935, 'eval_runtime': 4.3935, 'eval_samples_per_second': 1039.722, 'eval_steps_per_second': 16.388}

== LoRA Predictions ==
- IT과학 (17.4%)
- 정치 (16.7%)
- IT과학 (16.0%)
- 세계 (16.5%)


## 결과 비교

In [17]:
summary = {
    "settings": {
        "model": MODEL_NAME,
        "subset_size_each_split": SUBSET_SIZE,
        "epochs": EPOCHS,
        "batch_size": BATCH_SIZE,
        "max_length": MAX_LENGTH,
        "lr": LR,
    },
    "labels": label_names,
    "Fine-tuning": {
        "val_accuracy": float(baseline_val["eval_accuracy"]),
        "test_accuracy": float(baseline_test["eval_accuracy"]),
        "train_time_sec": int(baseline_train_seconds),
        "gpu_peak_bytes": int(baseline_gpu_peak),
        "gpu_peak_human": human_size(baseline_gpu_peak),
        "model_dir": BASELINE_DIR,
        "model_size_bytes": int(baseline_size),
        "model_size_human": human_size(baseline_size),
    },
    "lora": {
        "val_accuracy": float(lora_val["eval_accuracy"]),
        "test_accuracy": float(lora_test["eval_accuracy"]),
        "train_time_sec": int(lora_train_seconds),
        "gpu_peak_bytes": int(lora_gpu_peak),
        "gpu_peak_human": human_size(lora_gpu_peak),
        "model_dir": LORA_DIR,
        "model_size_bytes": int(lora_size),
        "model_size_human": human_size(lora_size),
    }
}

print(json.dumps(summary, ensure_ascii=False, indent=2))


{
  "settings": {
    "model": "klue/roberta-base",
    "subset_size_each_split": null,
    "epochs": 3,
    "batch_size": 32,
    "max_length": 128,
    "lr": 2e-05
  },
  "labels": [
    "IT과학",
    "경제",
    "사회",
    "생활문화",
    "세계",
    "스포츠",
    "정치"
  ],
  "Fine-tuning": {
    "val_accuracy": 0.8715273965081806,
    "test_accuracy": 0.8861646234676007,
    "train_time_sec": 515,
    "gpu_peak_bytes": 2581594112,
    "gpu_peak_human": "2.40 GB",
    "model_dir": "./ynat_runs/baseline_roberta",
    "model_size_bytes": 4429659502,
    "model_size_human": "4.13 GB"
  },
  "lora": {
    "val_accuracy": 0.8543977160426046,
    "test_accuracy": 0.8771891418563923,
    "train_time_sec": 446,
    "gpu_peak_bytes": 4544528384,
    "gpu_peak_human": "4.23 GB",
    "model_dir": "./ynat_runs/lora_roberta",
    "model_size_bytes": 82409486,
    "model_size_human": "78.59 MB"
  }
}
