In [None]:
!pip install torch==2.1.1 torchvision==0.16.1 torchaudio==2.1.1
!pip install transformers accelerate datasets
!pip install peft

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.1[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m
Collecting transformers
  Downloading transformers-4.51.3-py3-none-any.whl.metadata (38 kB)
Collecting accelerate
  Downloading accelerate-1.6.0-py3-none-any.whl.metadata (19 kB)
Collecting datasets
  Downloading datasets-3.6.0-py3-none-any.whl.metadata (19 kB)
Collecting huggingface-hub<1.0,>=0.30.0 (from transformers)
  Downloading huggingface_hub-0.31.1-py3-none-any.whl.metadata (13 kB)
Collecting regex!=2019.12.17 (from transformers)
  Downloading regex-2024.11.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (40 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.5/40.5 kB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers<0.22,>=0.21 (from transformers)
  Downloadi

In [None]:
from datasets import Dataset
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments,
    Trainer,
    DataCollatorForLanguageModeling
)
from peft import LoraConfig, get_peft_model
import torch
import json
import random

# === Step 1: Load ChatML Format Dataset ===
with open("6_wayo_qna_data.json", "r", encoding="utf-8") as f:
    raw_data = json.load(f)

dataset = Dataset.from_list(raw_data)

# === Step 2: Load Tokenizer & Base Model ===
model_name = "Qwen/Qwen3-8B"
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    trust_remote_code=True,
    torch_dtype=torch.float16
)

# === Step 3: Apply LoRA Adapter ===
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    lora_dropout=0.03,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=[
        "q_proj", "k_proj", "v_proj", "o_proj",
        "gate_proj", "up_proj", "down_proj"
    ]
)
model = get_peft_model(model, lora_config)
model = model.to("cuda")

# === Step 4: Tokenize Chat Format ===
def tokenize(example):
    if random.random() < 0.7:
        user_input = example.get("증상과 행동", "").strip()
    else:
        user_input = f"""📍 증상 및 행동: {example.get("증상과 행동", "")}\n🕒 시작 시점: {example.get("시작된 시점", "")}\n👤 보호자 반응: {example.get("보호자님 반응", "")}""".strip()

    assistant_response = f"""🔍 원인 분석:\n{example.get("원인 분석", "")}\n\n💡 솔루션 제안:\n{example.get("솔루션 제안", "")}""".strip()

    messages = [
        {"role": "user", "content": user_input},
        {"role": "assistant", "content": assistant_response}
    ]

    input_ids = tokenizer.apply_chat_template(
        messages,
        tokenize=True,
        truncation=True,
        max_length=2048
    )

    return {
        "input_ids": input_ids,
        "labels": input_ids.copy()
    }

tokenized_dataset = dataset.map(tokenize)

# === Step 5: Training Arguments ===
training_args = TrainingArguments(
    output_dir="./qwen3-lora-output",
    per_device_train_batch_size=1,
    gradient_accumulation_steps=8,
    num_train_epochs=3,
    learning_rate=1.5e-5,
    logging_steps=10,
    save_steps=100,
    save_total_limit=2,
    fp16=True,
    report_to="none"
)

# === Step 6: Trainer ===
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False)
)

# === Step 7: Train ===
trainer.train()


Loading checkpoint shards:   0%|          | 0/5 [00:00<?, ?it/s]

Map:   0%|          | 0/2452 [00:00<?, ? examples/s]

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss
10,2.7026
20,2.6084
30,2.5327
40,2.4508
50,2.4498
60,2.4242
70,2.3407
80,2.2942
90,2.2857
100,2.2638


TrainOutput(global_step=918, training_loss=1.9744175524493448, metrics={'train_runtime': 2638.9497, 'train_samples_per_second': 2.787, 'train_steps_per_second': 0.348, 'total_flos': 3.137344340725801e+17, 'train_loss': 1.9744175524493448, 'epoch': 2.99184339314845})

### qwen 모델 모듈 직접 확인하는 코드

In [None]:
for name, module in model.named_modules():
    print(name)


model
model.embed_tokens
model.layers
model.layers.0
model.layers.0.self_attn
model.layers.0.self_attn.q_proj
model.layers.0.self_attn.k_proj
model.layers.0.self_attn.v_proj
model.layers.0.self_attn.o_proj
model.layers.0.self_attn.q_norm
model.layers.0.self_attn.k_norm
model.layers.0.mlp
model.layers.0.mlp.gate_proj
model.layers.0.mlp.up_proj
model.layers.0.mlp.down_proj
model.layers.0.mlp.act_fn
model.layers.0.input_layernorm
model.layers.0.post_attention_layernorm
model.layers.1
model.layers.1.self_attn
model.layers.1.self_attn.q_proj
model.layers.1.self_attn.k_proj
model.layers.1.self_attn.v_proj
model.layers.1.self_attn.o_proj
model.layers.1.self_attn.q_norm
model.layers.1.self_attn.k_norm
model.layers.1.mlp
model.layers.1.mlp.gate_proj
model.layers.1.mlp.up_proj
model.layers.1.mlp.down_proj
model.layers.1.mlp.act_fn
model.layers.1.input_layernorm
model.layers.1.post_attention_layernorm
model.layers.2
model.layers.2.self_attn
model.layers.2.self_attn.q_proj
model.layers.2.self_att

### 파인튜닝 진행 상황 확인 코드

In [None]:
print(trainer.state.epoch)
print(trainer.state.global_step)
print(trainer.state)


2.99184339314845
918
TrainerState(epoch=2.99184339314845, global_step=918, max_steps=918, logging_steps=10, eval_steps=500, save_steps=100, train_batch_size=1, num_train_epochs=3, num_input_tokens_seen=0, total_flos=3.137344340725801e+17, log_history=[{'loss': 2.7026, 'grad_norm': 0.41688981652259827, 'learning_rate': 1.4852941176470589e-05, 'epoch': 0.03262642740619902, 'step': 10}, {'loss': 2.6084, 'grad_norm': 0.5307020545005798, 'learning_rate': 1.4689542483660132e-05, 'epoch': 0.06525285481239804, 'step': 20}, {'loss': 2.5327, 'grad_norm': 0.6510335206985474, 'learning_rate': 1.4526143790849673e-05, 'epoch': 0.09787928221859707, 'step': 30}, {'loss': 2.4508, 'grad_norm': 0.41874781250953674, 'learning_rate': 1.4362745098039217e-05, 'epoch': 0.13050570962479607, 'step': 40}, {'loss': 2.4498, 'grad_norm': 0.3602053225040436, 'learning_rate': 1.4199346405228758e-05, 'epoch': 0.1631321370309951, 'step': 50}, {'loss': 2.4242, 'grad_norm': 0.3123202621936798, 'learning_rate': 1.40359477