# Qwen2.5-3B-Instruct LoRA Fine-tuning (KoAlpaca)

Intel Arc GPU(XPU) 환경에서 한국어 KoAlpaca 데이터셋을 사용해 Qwen/Qwen2.5-3B-Instruct 모델을 LoRA 방식으로 미세 조정하는 워크플로입니다. 각 셀을 순서대로 실행하세요.


In [2]:
import torch
from contextlib import nullcontext

if hasattr(torch, "xpu") and torch.xpu.is_available():
    device = torch.device("xpu")
    try:
        device_name = torch.xpu.get_device_name(torch.xpu.current_device())
    except Exception:
        device_name = "Intel XPU"
    print("Using XPU device:", device_name)
elif torch.cuda.is_available():
    device = torch.device("cuda")
    print("Using CUDA device:", torch.cuda.get_device_name(0))
else:
    device = torch.device("cpu")
    print("Using CPU (Arc GPU가 인식되지 않았습니다)")

train_dtype = torch.bfloat16 if device.type != "cpu" else torch.float32
print("Training dtype:", train_dtype)


Using XPU device: Intel(R) Graphics [0x7d55]
Training dtype: torch.bfloat16


In [3]:
from dataclasses import dataclass
from typing import Dict, List

from datasets import load_dataset
from peft import LoraConfig, get_peft_model
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import DataLoader
from transformers import (
    AutoConfig,
    AutoModelForCausalLM,
    AutoTokenizer,
    get_linear_schedule_with_warmup,
)
from transformers.dynamic_module_utils import get_class_from_dynamic_module


In [4]:
import os
os.environ["HF_HUB_DISABLE_PROGRESS_BARS"] = "1"

from huggingface_hub.utils import disable_progress_bars
disable_progress_bars()


In [5]:
import math

MODEL_NAME = "Qwen/Qwen2.5-3B-Instruct"
DATASET_NAME = "beomi/KoAlpaca-v1.1a"
SYSTEM_PROMPT = "당신은 유용한 한국어 AI 어시스턴트입니다."
OUTPUT_DIR = "outputs/qwen25_3b_koalpaca_lora"

MAX_SAMPLES = 2000
MAX_LENGTH = 2048
BATCH_SIZE = 1
GRADIENT_ACCUMULATION = 8
EPOCHS = 2
LEARNING_RATE = 2e-4
WARMUP_RATIO = 0.05
MAX_GRAD_NORM = 1.0

torch.manual_seed(42)
if device.type == "cuda":
    torch.cuda.manual_seed_all(42)
elif device.type == "xpu" and hasattr(torch.xpu, "manual_seed_all"):
    torch.xpu.manual_seed_all(42)


In [None]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
tokenizer.padding_side = "right"
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

try:
    model = AutoModelForCausalLM.from_pretrained(
        MODEL_NAME,
        trust_remote_code=True,
        dtype=train_dtype,
    )
except ValueError as exc:
    print("Falling back to dynamic module loader for Qwen2.5-3B-Instruct (AutoModelForCausalLM not mapped).")
    config = AutoConfig.from_pretrained(MODEL_NAME, trust_remote_code=True)
    auto_map = getattr(config, "auto_map", {}) or {}
    model_ref = auto_map.get("AutoModelForCausalLM") or auto_map.get("AutoModel")
    if model_ref is None:
        architectures = getattr(config, "architectures", []) or []
        if not architectures:
            raise RuntimeError(
                "Unable to resolve model class automatically. Update transformers to the latest version."
            ) from exc
        model_cls_name = architectures[0]
        module_name = f"modeling_{config.model_type}"
        model_ref = f"{module_name}.{model_cls_name}"
    ModelClass = get_class_from_dynamic_module(
        model_ref,
        MODEL_NAME,
        trust_remote_code=True,
    )
    model = ModelClass.from_pretrained(
        MODEL_NAME,
        config=config,
        trust_remote_code=True,
        dtype=train_dtype,
    )

lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "up_proj", "down_proj", "gate_proj"],
    bias="none",
    task_type="CAUSAL_LM",
)

model = get_peft_model(model, lora_config)
model.to(device)
model.print_trainable_parameters()


Cancellation requested; stopping current tasks.


In [None]:
def build_chat_prompt(example: Dict[str, str]) -> str:
    user_content = example["instruction"]
    if example.get("input"):
        user_content += f"\n\n입력:\n{example['input']}"
    messages = [
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": user_content},
        {"role": "assistant", "content": example["output"]},
    ]
    return tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=False)


def tokenize_example(example: Dict[str, str]) -> Dict[str, List[int]]:
    text = build_chat_prompt(example)
    tokenized = tokenizer(
        text,
        truncation=True,
        max_length=MAX_LENGTH,
        padding=False,
    )
    return {
        "input_ids": tokenized["input_ids"],
        "attention_mask": tokenized["attention_mask"],
    }


In [None]:
raw_dataset = load_dataset(DATASET_NAME)
split_dataset = raw_dataset["train"].train_test_split(test_size=0.05, seed=42)

if MAX_SAMPLES:
    split_dataset["train"] = split_dataset["train"].select(range(min(MAX_SAMPLES, len(split_dataset["train"]))))
    eval_cap = max(1, MAX_SAMPLES // 20)
    split_dataset["test"] = split_dataset["test"].select(range(min(eval_cap, len(split_dataset["test"]))))

train_dataset = split_dataset["train"].map(tokenize_example, remove_columns=split_dataset["train"].column_names)
eval_dataset = split_dataset["test"].map(tokenize_example, remove_columns=split_dataset["test"].column_names)

print("Train samples:", len(train_dataset))
print("Eval samples:", len(eval_dataset))


In [None]:
@dataclass
class DataCollator:
    tokenizer: AutoTokenizer

    def __call__(self, features: List[Dict[str, List[int]]]):
        input_ids = [torch.tensor(f["input_ids"], dtype=torch.long) for f in features]
        attention_mask = [torch.tensor(f["attention_mask"], dtype=torch.long) for f in features]

        input_ids = pad_sequence(input_ids, batch_first=True, padding_value=self.tokenizer.pad_token_id)
        attention_mask = pad_sequence(attention_mask, batch_first=True, padding_value=0)

        labels = input_ids.clone()
        labels[input_ids == self.tokenizer.pad_token_id] = -100

        return {
            "input_ids": input_ids.to(device),
            "attention_mask": attention_mask.to(device),
            "labels": labels.to(device),
        }


data_collator = DataCollator(tokenizer)
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, collate_fn=data_collator)
eval_dataloader = DataLoader(eval_dataset, batch_size=BATCH_SIZE, shuffle=False, collate_fn=data_collator)

len(train_dataloader), len(eval_dataloader)


In [None]:
optimizer = torch.optim.AdamW(model.parameters(), lr=LEARNING_RATE, betas=(0.9, 0.999), weight_decay=0.01)
total_training_steps = math.ceil(len(train_dataloader) * EPOCHS / GRADIENT_ACCUMULATION)
warmup_steps = max(1, int(total_training_steps * WARMUP_RATIO))
scheduler = get_linear_schedule_with_warmup(optimizer, warmup_steps, total_training_steps)

def autocast_context():
    if device.type == "cuda":
        return torch.cuda.amp.autocast(dtype=torch.bfloat16)
    if device.type == "xpu" and hasattr(torch, "xpu") and hasattr(torch.xpu, "amp"):
        return torch.xpu.amp.autocast(dtype=torch.bfloat16)
    return nullcontext()

def train_epoch(epoch: int) -> float:
    model.train()
    optimizer.zero_grad()
    running_loss = 0.0
    step_count = 0
    for step, batch in enumerate(train_dataloader, start=1):
        with autocast_context():
            outputs = model(**batch)
            loss = outputs.loss / GRADIENT_ACCUMULATION
        loss.backward()
        running_loss += outputs.loss.item()
        step_count += 1

        if step % GRADIENT_ACCUMULATION == 0 or step == len(train_dataloader):
            torch.nn.utils.clip_grad_norm_(model.parameters(), MAX_GRAD_NORM)
            optimizer.step()
            scheduler.step()
            optimizer.zero_grad()

        if step % 50 == 0:
            avg_loss = running_loss / step_count
            print(f"Epoch {epoch + 1} | Step {step}/{len(train_dataloader)} | Loss {avg_loss:.4f}")

    return running_loss / max(step_count, 1)


def evaluate() -> float:
    model.eval()
    losses = []
    with torch.no_grad():
        for batch in eval_dataloader:
            with autocast_context():
                outputs = model(**batch)
            losses.append(outputs.loss.item())

    mean_loss = sum(losses) / max(len(losses), 1)
    perplexity = math.exp(min(mean_loss, 20))
    print(f"Eval | Loss {mean_loss:.4f} | Perplexity {perplexity:.2f}")
    return mean_loss


In [None]:
for epoch in range(EPOCHS):
    train_loss = train_epoch(epoch)
    print(f"Epoch {epoch + 1} complete | Avg train loss {train_loss:.4f}")
    evaluate()


In [None]:
model.save_pretrained(OUTPUT_DIR)
tokenizer.save_pretrained(OUTPUT_DIR)
print(f"LoRA 어댑터와 토크나이저를 '{OUTPUT_DIR}'에 저장했습니다.")


In [None]:
model.eval()

demo_question = "고려 시대의 대표적인 문화유산 한 가지를 소개하고 특징을 설명해줘."
messages = [
    {"role": "system", "content": SYSTEM_PROMPT},
    {"role": "user", "content": demo_question},
]
prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
inputs = tokenizer(prompt, return_tensors="pt").to(device)

with torch.no_grad():
    generated = model.generate(
        **inputs,
        max_new_tokens=256,
        temperature=0.7,
        top_p=0.9,
        do_sample=True,
        eos_token_id=tokenizer.eos_token_id,
    )

decoded = tokenizer.decode(generated[0], skip_special_tokens=True)
print(decoded)
