# Lora微调

In [22]:
from peft import LoraConfig,get_peft_model,PeftModel
from transformers import AutoModelForCausalLM,AutoTokenizer,TrainingArguments,Trainer,default_data_collator
from datasets import load_dataset
import os
import warnings
warnings.filterwarnings('ignore')
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

## 1、加载模型和分词器

In [5]:
model_name="Qwen/Qwen3-0.6B"
model=AutoModelForCausalLM.from_pretrained(model_name,device_map='auto',torch_dtype="auto",)
tokenizer=AutoTokenizer.from_pretrained(model_name)

Loading weights: 100%|██████████| 311/311 [00:01<00:00, 265.63it/s, Materializing param=model.norm.weight]                              


In [6]:
model

Qwen3ForCausalLM(
  (model): Qwen3Model(
    (embed_tokens): Embedding(151936, 1024)
    (layers): ModuleList(
      (0-27): 28 x Qwen3DecoderLayer(
        (self_attn): Qwen3Attention(
          (q_proj): Linear(in_features=1024, out_features=2048, bias=False)
          (k_proj): Linear(in_features=1024, out_features=1024, bias=False)
          (v_proj): Linear(in_features=1024, out_features=1024, bias=False)
          (o_proj): Linear(in_features=2048, out_features=1024, bias=False)
          (q_norm): Qwen3RMSNorm((128,), eps=1e-06)
          (k_norm): Qwen3RMSNorm((128,), eps=1e-06)
        )
        (mlp): Qwen3MLP(
          (gate_proj): Linear(in_features=1024, out_features=3072, bias=False)
          (up_proj): Linear(in_features=1024, out_features=3072, bias=False)
          (down_proj): Linear(in_features=3072, out_features=1024, bias=False)
          (act_fn): SiLUActivation()
        )
        (input_layernorm): Qwen3RMSNorm((1024,), eps=1e-06)
        (post_attention_layer

## 2、LoraConfig配置

In [7]:
lora_config = LoraConfig(
    r=16,  # LoRA矩阵的秩
    lora_alpha=32,  # LoRA alpha参数
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],  # 要应用LoRA的模块
    lora_dropout=0.05,  # Dropout概率
    bias="none",  # 是否训练偏置
    task_type="CAUSAL_LM",  # 任务类型
)

In [8]:
def print_module_names(model, prefix=""):
    for name, module in model.named_children():
        full_name = f"{prefix}.{name}" if prefix else name
        print(full_name)
        print_module_names(module, full_name)

print_module_names(model)

model
model.embed_tokens
model.layers
model.layers.0
model.layers.0.self_attn
model.layers.0.self_attn.q_proj
model.layers.0.self_attn.k_proj
model.layers.0.self_attn.v_proj
model.layers.0.self_attn.o_proj
model.layers.0.self_attn.q_norm
model.layers.0.self_attn.k_norm
model.layers.0.mlp
model.layers.0.mlp.gate_proj
model.layers.0.mlp.up_proj
model.layers.0.mlp.down_proj
model.layers.0.mlp.act_fn
model.layers.0.input_layernorm
model.layers.0.post_attention_layernorm
model.layers.1
model.layers.1.self_attn
model.layers.1.self_attn.q_proj
model.layers.1.self_attn.k_proj
model.layers.1.self_attn.v_proj
model.layers.1.self_attn.o_proj
model.layers.1.self_attn.q_norm
model.layers.1.self_attn.k_norm
model.layers.1.mlp
model.layers.1.mlp.gate_proj
model.layers.1.mlp.up_proj
model.layers.1.mlp.down_proj
model.layers.1.mlp.act_fn
model.layers.1.input_layernorm
model.layers.1.post_attention_layernorm
model.layers.2
model.layers.2.self_attn
model.layers.2.self_attn.q_proj
model.layers.2.self_attn

In [9]:
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()  # 打印可训练参数

trainable params: 4,587,520 || all params: 756,219,904 || trainable%: 0.6066


## 3、加载数据集

In [10]:
data=load_dataset('json',data_files='../../dataset/chinese_law_ft_dataset.json',split="train[:1000]")

In [11]:
dataset=data.train_test_split(
    train_size=0.7,
    shuffle=True,
    seed=7
)

In [12]:
dataset

DatasetDict({
    train: Dataset({
        features: ['instruction', 'input', 'output', 'id'],
        num_rows: 700
    })
    test: Dataset({
        features: ['instruction', 'input', 'output', 'id'],
        num_rows: 300
    })
})

## 4、数据预处理

In [13]:
def process_fun(example):
    content=[]
    for instruction,input,output in zip(example['instruction'],example['input'],example['output']):
        if input.strip():
            text=f'Human:{instruction}\n{input}\nAI:{output}'
            content.append(text)
        else:
            text=f'Human:{instruction}\nAI:{output}'
            content.append(text)
    
    encoded = tokenizer(
        content,
        max_length=512,
        truncation=True,
        padding="max_length",
        return_tensors="pt"
    )
    labels = encoded["input_ids"].clone()
    for index,text in enumerate(content):
        answer_start=text.find('AI:')+len('AI:')
        question=text[:answer_start]
        question_ids=tokenizer.encode(question, add_special_tokens=False)
        question_length=len(question_ids)
        labels[index,:question_length]=-100
    return {
        "input_ids": encoded["input_ids"],
        "attention_mask": encoded["attention_mask"],
        "labels": labels
    }


In [14]:
train_process_data=dataset['train'].map(process_fun,batched=True,remove_columns=dataset['train'].column_names)
test_process_data=dataset['test'].map(process_fun,batched=True,remove_columns=dataset['test'].column_names)

## 5、模型训练配置

In [16]:
# 定义训练参数
training_args = TrainingArguments(
    output_dir="../../models/lora",
    logging_steps=10,
    logging_dir='./runs',
    eval_strategy='epoch',
    num_train_epochs=3,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    learning_rate=2e-5,
    weight_decay=0.01,
    load_best_model_at_end=True,
    metric_for_best_model='eval_loss',
    gradient_accumulation_steps=4,  # 如果GPU内存有限
)

`logging_dir` is deprecated and will be removed in v5.2. Please set `TENSORBOARD_LOGGING_DIR` instead.


## 6、训练模型

In [17]:
trainer=Trainer(
    model=model,
    args=training_args,
    eval_dataset=test_process_data,
    train_dataset=train_process_data,
    data_collator=default_data_collator,
)

In [18]:
trainer.train()

Epoch,Training Loss,Validation Loss
1,6.326041,4.737208
2,0.632712,0.484325
3,0.465811,0.438377


TrainOutput(global_step=132, training_loss=3.6078737260717335, metrics={'train_runtime': 14371.4837, 'train_samples_per_second': 0.146, 'train_steps_per_second': 0.009, 'total_flos': 3874832252928000.0, 'train_loss': 3.6078737260717335, 'epoch': 3.0})

## 7、保存模型

In [20]:
trainer.save_model('../../models/lora')

## 8、模型推理

In [49]:
model=AutoModelForCausalLM.from_pretrained("Qwen/Qwen3-0.6B",device_map='auto',torch_dtype="auto",)
model = PeftModel.from_pretrained(model, '../../models/lora',device_map='auto')

Loading weights: 100%|██████████| 311/311 [00:00<00:00, 428.27it/s, Materializing param=model.norm.weight]                              


Human:我想知道如果当事人或其法定代理人提出回避申请，公安机关应该在什么时候作出决定并通知申请人？
AI:


In [56]:
text = "我想知道如果当事人或其法定代理人提出回避申请，公安机关应该在什么时候作出决定并通知申请人？"
prompt = f"Human:{text}\nAI:"
inputs = {k: v.to("cuda") for k, v in tokenizer(prompt, return_tensors="pt").items()}

# 生成
outputs = model.generate(
    **inputs,
    max_new_tokens=512,
    do_sample=True,
    temperature=0.7,
    top_k=50,
    top_p=0.95,
    repetition_penalty=1.2,
)

# 输出结果
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

Human:我想知道如果当事人或其法定代理人提出回避申请，公安机关应该在什么时候作出决定并通知申请人？
AI: 当事人或者其法定代理人、诉讼参与人的申请被受理后，在收到申请之日起5个工作日内应当由公安机关作出书面决定，并依照规定告知申请人。
