In [None]:
# 패키지 설치
!pip install trl peft accelerate bitsandbytes datasets transformers wandb

In [None]:
import os
import torch
import wandb
from datasets import load_dataset, Dataset
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    TrainingArguments,
    Trainer,
    BitsAndBytesConfig,
)
from huggingface_hub import login

# 0. Hugging Face 토큰 직접 로그인
login(token="a")  

# 1. wandb 초기화
wandb.init(project="GenBot", name="exaone-finetuning1")

# 2. 모델 및 토크나이저 설정
model_name   = "LGAI-EXAONE/EXAONE-3.5-2.4B-Instruct"
dataset_path = "card_consult_finetune_messages.jsonl"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
)
model = prepare_model_for_kbit_training(model)
model.config.use_cache = False

tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
tokenizer.pad_token = tokenizer.eos_token

# 3. LoRA 설정
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM",
)
model = get_peft_model(model, peft_config)

# 4. 데이터 처리 함수
def format_data(input):
    formatted_dialogue = {"messages": []}
    prev_role, temp_content = None, ""
    system_prompt = {"role": "system", "content": "당신은 친절하고 정확한 고객상담 챗봇입니다."}
    formatted_dialogue["messages"].append(system_prompt)
    for message in input["messages"]:
        role, content = message["role"], message["content"].strip()
        if role == prev_role:
            temp_content += " " + content
        else:
            if temp_content:
                formatted_dialogue["messages"].append({"role": prev_role, "content": temp_content.strip()})
            temp_content, prev_role = content, role
    if temp_content:
        formatted_dialogue["messages"].append({"role": prev_role, "content": temp_content.strip()})
    return formatted_dialogue

# 5. 데이터셋 로드 및 전처리
dataset = load_dataset("json", data_files=dataset_path, split="train")
dataset = dataset.map(format_data, batched=False)

def preprocess(example, tokenizer):
    in_ids, lbls = [], []
    msgs = example["messages"]
    for i, msg in enumerate(msgs):
        if msg["role"] != "assistant":
            continue
        ctx = ""
        for prev in msgs[:i]:
            tag = "<|user|>" if prev["role"] == "user" else "<|assistant|>"
            ctx += tag + prev["content"] + tokenizer.eos_token
        ctx += "<|assistant|>"
        resp = msg["content"] + tokenizer.eos_token
        ctx_ids  = tokenizer(ctx,  add_special_tokens=False).input_ids
        resp_ids = tokenizer(resp, add_special_tokens=False).input_ids
        in_ids.append(ctx_ids + resp_ids)
        lbls.append([-100] * len(ctx_ids) + resp_ids)
    return {"input_ids": in_ids, "labels": lbls}

proc = dataset.map(
    preprocess,
    batched=False,
    remove_columns=["messages"],
    fn_kwargs={"tokenizer": tokenizer},
)

# 6. Flatten
all_inputs, all_labels = [], []
for x, y in zip(proc["input_ids"], proc["labels"]):
    all_inputs.extend(x)
    all_labels.extend(y)
train_ds = Dataset.from_dict({"input_ids": all_inputs, "labels": all_labels})

# 7. Collate 함수
def collate_fn(batch):
    return tokenizer.pad(
        {
            "input_ids": [ex["input_ids"] for ex in batch],
            "labels":    [ex["labels"]    for ex in batch],
        },
        padding=True,
        return_tensors="pt"
    )

# 8. 학습 설정 (wandb + Hub)
args = TrainingArguments(
    output_dir="./lora_exaone",
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    learning_rate=2e-4,
    logging_steps=10,
    num_train_epochs=3,
    save_strategy="epoch",
    report_to="wandb",
    run_name="exaone-chatbot1",
    fp16=True,
    gradient_checkpointing=True,
    gradient_checkpointing_kwargs={"use_reentrant": False},
    push_to_hub=True,
    hub_model_id="jangsukim/exaone_finetuning1",
    hub_strategy="every_save",
)

# 9. Trainer 정의
trainer = Trainer(
    model=model,
    args=args,
    train_dataset=train_ds,
    data_collator=collate_fn,
)

# 10. 학습 시작 & 허브 업로드
trainer.train()
trainer.push_to_hub()
