In [1]:
from datasets import load_dataset
from transformers import TrainingArguments
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import prepare_model_for_kbit_training, get_peft_model, LoraConfig
from trl import SFTTrainer
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "2"

In [3]:
seed = 42

In [4]:
model_name = "Qwen/Qwen2.5-32B-Instruct-AWQ"
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    device_map="cuda",   # ここは必ずcudaを指定（）
)

Loading checkpoint shards: 100%|██████████| 5/5 [00:14<00:00,  2.84s/it]


In [None]:
# 量子化されたモデルに対してLoRAを準備
model = prepare_model_for_kbit_training(model)

# LoRA設定
peft_config = LoraConfig(
    r=2,
    lora_alpha=16,
    inference_mode=False,
    target_modules=["q_proj", "v_proj"],  # Qwen系ではここが多い
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, peft_config)
model.print_trainable_parameters()
model.gradient_checkpointing_enable()
model.enable_input_require_grads()


trainable params: 2,097,152 || all params: 1,559,892,992 || trainable%: 0.1344


In [6]:

data_file = "../dataset/discription_svg.jsonl"
dataset = load_dataset("json", data_files=data_file, split="train").train_test_split(test_size=0.2, seed=seed)
print(dataset)


DatasetDict({
    train: Dataset({
        features: ['messages'],
        num_rows: 799
    })
    test: Dataset({
        features: ['messages'],
        num_rows: 200
    })
})


In [None]:
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset['train'],
    eval_dataset=dataset['test'],
    args=TrainingArguments(
        per_device_train_batch_size=1,
        gradient_accumulation_steps=8,
        gradient_checkpointing=True,
        num_train_epochs=1,
        logging_steps=10,
        evaluation_strategy="steps",
        eval_steps=50,
        warmup_steps=10,
        save_steps=100,
        save_total_limit=2,
        max_steps=-1,
        learning_rate=2e-4,
        group_by_length=True,
        seed=42,
        output_dir="outputs",
        report_to="none",
        fp16=True,  # 半精度で学習
    ),
)


Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


In [None]:
trainer.train()

`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...


Step,Training Loss,Validation Loss
