# PEFT 高效微调
- 加载模型
- 使用peft model （lora配置）添加适配器（adapter）
- 训练数据准备
- 模型训练
- 模型保存
- 模型推理









In [9]:
# import os
# os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

In [12]:
## 准备数据

from datasets import load_dataset

dataset = load_dataset("Abirate/english_quotes")

print(dataset["train"][100])








{'quote': '“I\'m in love with you," he said quietly."Augustus," I said."I am," he said. He was staring at me, and I could see the corners of his eyes crinkling. "I\'m in love with you, and I\'m not in the business of denying myself the simple pleasure of saying true things. I\'m in love with you, and I know that love is just a shout into the void, and that oblivion is inevitable, and that we\'re all doomed and that there will come a day when all our labor has been returned to dust, and I know the sun will swallow the only earth we\'ll ever have, and I am in love with you.”', 'author': 'John Green,', 'tags': ['doomed', 'inevitable', 'love', 'oblivion', 'pleasure', 'simple']}


In [13]:
from transformers import AutoTokenizer, AutoModelForCausalLM


model_id = f"/mnt/e/models/Qwen2.5-0.5B-Instruct"

model = AutoModelForCausalLM.from_pretrained(model_id, load_in_8bit=True, trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)



The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.
`low_cpu_mem_usage` was None, now default to True since model is quantized.


In [6]:
# 为 8 位量化训练准备模型
# from peft import prepare_model_for_int8_training
from peft import prepare_model_for_kbit_training
model = prepare_model_for_kbit_training(model)

In [14]:
## from peft import prepare_model_for_int8_training
from peft import LoraConfig, get_peft_model

config = LoraConfig(
    r=8,  # LoRA的秩，影响LoRA矩阵的大小
    lora_alpha=32,  # LoRA适应的比例因子
    # 指定将LoRA应用到的模型模块，通常是attention和全连接层的投影
    # target_modules = ["q_proj", "k_proj", "v_proj", "out_proj", "fc_in", "fc_out"],
    target_modules = ["q_proj"],
    lora_dropout=0.05,  # 在LoRA模块中使用的dropout率
    bias="none",  # 设置bias的使用方式，这里没有使用bias
    task_type="CAUSAL_LM"  # 任务类型，这里设置为因果(自回归）语言模型
)

model = get_peft_model(model,config)

# 打印出模型中可训练的参数
model.print_trainable_parameters()

# model = prepare_model_for_int8_training(model)

trainable params: 344,064 || all params: 494,376,832 || trainable%: 0.0696


In [15]:

def tokenize(example):
    return tokenizer(example["quote"], padding="max_length", truncation=True,max_length=512)

# train_dataset = dataset["train"]

tokenizer_dataset = dataset.map(tokenize, batched=True)






In [16]:
input = tokenizer(dataset["train"][100]["quote"])
print(input)

{'input_ids': [10168, 2776, 304, 2948, 448, 498, 1335, 566, 1053, 29566, 1189, 31459, 355, 1335, 358, 1053, 1189, 40, 1079, 1335, 566, 1053, 13, 1260, 572, 36774, 518, 752, 11, 323, 358, 1410, 1490, 279, 23462, 315, 806, 6414, 1560, 766, 2718, 13, 330, 40, 2776, 304, 2948, 448, 498, 11, 323, 358, 2776, 537, 304, 279, 2562, 315, 40466, 7037, 279, 4285, 16656, 315, 5488, 830, 2513, 13, 358, 2776, 304, 2948, 448, 498, 11, 323, 358, 1414, 429, 2948, 374, 1101, 264, 41123, 1119, 279, 737, 11, 323, 429, 66005, 290, 374, 30252, 11, 323, 429, 582, 2299, 678, 57637, 323, 429, 1052, 686, 2525, 264, 1899, 979, 678, 1039, 9327, 702, 1012, 5927, 311, 15797, 11, 323, 358, 1414, 279, 7015, 686, 41176, 279, 1172, 9393, 582, 3278, 3512, 614, 11, 323, 358, 1079, 304, 2948, 448, 498, 1987], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 

In [18]:
from transformers import DataCollatorForLanguageModeling

# 数据收集器，用于处理语言模型的数据，这里设置为不使用掩码语言模型(MLM)
data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)

In [19]:
## 微调模型

from transformers import TrainingArguments, Trainer


trainingArgs = TrainingArguments(
        output_dir=f"/mnt/e/models/peft/qw-peft0.5",  # 指定模型输出和保存的目录
        per_device_train_batch_size=4,  # 每个设备上的训练批量大小
        learning_rate=2e-4,  # 学习率
        fp16=True,  # 启用混合精度训练，可以提高训练速度，同时减少内存使用
        logging_steps=20,  # 指定日志记录的步长，用于跟踪训练进度
        max_steps=100, # 最大训练步长
        # num_train_epochs=1  # 训练的总轮数
        gradient_accumulation_steps=4  # 梯度累积步数
)

train = Trainer(
    model=model,
    train_dataset=tokenizer_dataset["train"],
    args=trainingArgs,
    data_collator=data_collator
)


train.train()


model.save_pretrained("/mnt/e/models/peft/qw-peft0.5")
tokenizer.save_pretrained("/mnt/e/models/peft/qw-peft0.5")




Step,Training Loss
20,3.2032
40,3.306
60,3.1602
80,3.1169
100,3.1863


('/mnt/e/models/peft/qw-peft0.5/tokenizer_config.json',
 '/mnt/e/models/peft/qw-peft0.5/special_tokens_map.json',
 '/mnt/e/models/peft/qw-peft0.5/vocab.json',
 '/mnt/e/models/peft/qw-peft0.5/merges.txt',
 '/mnt/e/models/peft/qw-peft0.5/added_tokens.json',
 '/mnt/e/models/peft/qw-peft0.5/tokenizer.json')

In [25]:
newmodel = train.model

text = "No matter how careful you are"
inputs = tokenizer(text, return_tensors="pt").to(0)

out = newmodel.generate(**inputs, max_new_tokens=48)
print(tokenizer.decode(out[0], skip_special_tokens=True))

No matter how careful you are, there is always a chance that you will make a mistake. In fact, it's not uncommon for most people to make mistakes at some point in their lives. The key to avoiding these mistakes is to be prepared and take steps to avoid
