### 加载模型测试，并打印模型，确认最后一层分类层名字，lora微调参数设置

In [1]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer

# 设置代理（替换为你的实际代理地址）
proxies = {
    "http": "http://127.0.0.1:7890",   # 例如：http://127.0.0.1:7890
    "https": "http://127.0.0.1:7890",  # 例如：http://127.0.0.1:7890
}
model_name = "qwen/Qwen2-0.5B-Instruct"

# 下载并加载模型和分词器（显式传递代理）
tokenizer = AutoTokenizer.from_pretrained(
    model_name,
    proxies=proxies
)
model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=2,
    proxies=proxies
)
print(model)

  from .autonotebook import tqdm as notebook_tqdm
Sliding Window Attention is enabled but not implemented for `sdpa`; unexpected results may be encountered.
Some weights of Qwen2ForSequenceClassification were not initialized from the model checkpoint at qwen/Qwen2-0.5B-Instruct and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Qwen2ForSequenceClassification(
  (model): Qwen2Model(
    (embed_tokens): Embedding(151936, 896)
    (layers): ModuleList(
      (0-23): 24 x Qwen2DecoderLayer(
        (self_attn): Qwen2Attention(
          (q_proj): Linear(in_features=896, out_features=896, bias=True)
          (k_proj): Linear(in_features=896, out_features=128, bias=True)
          (v_proj): Linear(in_features=896, out_features=128, bias=True)
          (o_proj): Linear(in_features=896, out_features=896, bias=False)
        )
        (mlp): Qwen2MLP(
          (gate_proj): Linear(in_features=896, out_features=4864, bias=False)
          (up_proj): Linear(in_features=896, out_features=4864, bias=False)
          (down_proj): Linear(in_features=4864, out_features=896, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): Qwen2RMSNorm((896,), eps=1e-06)
        (post_attention_layernorm): Qwen2RMSNorm((896,), eps=1e-06)
      )
    )
    (norm): Qwen2RMSNorm((896,), eps=1e-06)
    (rotary_emb): Q

### 完整的lora微调qwen代码

In [2]:
import torch
import pandas as pd
from datasets import Dataset
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer,
    EarlyStoppingCallback
)
from peft import (
    get_peft_model,
    LoraConfig,
    TaskType
)
import numpy as np
from sklearn.metrics import accuracy_score
import model_config_qwen  # 自定义配置文件，需根据实际情况修改
import os

# 环境配置
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
os.environ["TRUST_REMOTE_CODE"] = "true"
model_name = "qwen/Qwen2-0.5B-Instruct"
# 代理设置（根据需要配置）
proxies = {
    "http": "http://127.0.0.1:7890",
    "https": "http://127.0.0.1:7890",
}

# ========== 配置类 ==========
class ModelConfig:
    model_path = model_config_qwen.model_name_tokenizer_path   # 模型保存路径
    num_labels = model_config_qwen.num_labels                 # 分类类别数
    model_min_length = model_config_qwen.model_min_length     # 最小输入长度
    freeze_base_model = model_config_qwen.freeze_base_model   # 是否冻结基础模型参数（只训练 LoRA 参数）
    pooling_type = model_config_qwen.pooling_type             # 池化方式（如 last, first, mean）

class TrainingConfig:
    output_dir = model_config_qwen.output_dir                 # 输出目录
    max_epochs = model_config_qwen.max_epochs                 # 最大训练轮次
    batch_size = model_config_qwen.train_batch_size           # 训练批次大小
    eval_batch_size = model_config_qwen.test_batch_size         # 验证批次大小
    learning_rate = model_config_qwen.learning_rate           # 学习率
    weight_decay = model_config_qwen.weight_decay             # 权重衰减
    gradient_accumulation = model_config_qwen.gradient_accumulation_steps  # 梯度累积步数
    fp16 = model_config_qwen.use_fp16                         # 是否启用混合精度
    gradient_checkpointing = model_config_qwen.gradient_checkpointing  # 是否启用梯度检查点
    early_stop_patience = model_config_qwen.early_stop_patience           # 早停耐心值

class LoRAConfig:
    r = model_config_qwen.lora_r                  # LoRA 秩
    lora_alpha = model_config_qwen.lora_alpha     # LoRA alpha 值
    lora_dropout = model_config_qwen.lora_dropout # LoRA dropout 率
    target_modules = model_config_qwen.target_modules  # LoRA 所针对的目标模块列表

# ========== 工具函数 ==========
def analyze_text_length(texts, tokenizer):
    """分析文本长度分布，并动态确定 max_length"""
    lengths = []
    for text in texts:
        tokens = tokenizer.tokenize(text)
        lengths.append(len(tokens))
    print(f"\n📊 文本长度分析:")
    print(f"最小: {min(lengths)}, 最大: {max(lengths)}, 平均: {np.mean(lengths):.1f}")
    print(f"75% 分位数: {np.percentile(lengths, 75)}, 95% 分位数: {np.percentile(lengths, 95)}")
    max_length = min(int(np.percentile(lengths, 95)), tokenizer.model_max_length)
    max_length = max(max_length, ModelConfig.model_min_length)
    print(f"推荐 max_length: {max_length}")
    return max_length

def prepare_dataset(tokenizer, texts, labels, max_length):
    """数据集预处理，将文本 token 化，同时保留标签"""
    dataset = Dataset.from_dict({"text": texts, "label": labels})
    
    def tokenize_fn(examples):
        tokenized = tokenizer(
            examples["text"],
            padding="max_length",
            truncation=True,
            max_length=max_length,
            return_tensors="pt"
        )
        return {
            "input_ids": tokenized["input_ids"][0],
            "attention_mask": tokenized["attention_mask"][0],
            "label": examples["label"]
        }
    
    return dataset.map(
        tokenize_fn,
        batched=False,
        remove_columns=["text"],
        num_proc=4,
        desc="Tokenizing"
    )

# ========== 主流程 ==========
def main():
    # 硬件检查
    print(f"\n🖥️ 硬件配置检查:")
    print(f"PyTorch 版本: {torch.__version__}")
    print(f"CUDA 可用: {torch.cuda.is_available()}")
    if torch.cuda.is_available():
        print(f"GPU: {torch.cuda.get_device_name(0)}")
        print(f"显存: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f}GB")

    # 初始化配置
    model_config = ModelConfig()
    train_config = TrainingConfig()
    lora_config = LoRAConfig()

    # 加载数据
    print("\n📂 加载数据集...")
    train_df = pd.read_excel(model_config_qwen.train_data_path)
    valid_df = pd.read_excel(model_config_qwen.dev_data_path)
    print(f"训练集: {len(train_df)} 条, 验证集: {len(valid_df)} 条")

    # 初始化 Tokenizer
    tokenizer = AutoTokenizer.from_pretrained(
        model_name,
        trust_remote_code=True,
        proxies=proxies,
        pad_token="<|endoftext|>"
    )
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token

    # 动态确定 max_length
    max_length = analyze_text_length(train_df.iloc[:, 0].tolist(), tokenizer)

    # 准备数据集
    train_dataset = prepare_dataset(
        tokenizer,
        train_df.iloc[:, 0].tolist(),
        train_df.iloc[:, 1].tolist(),
        max_length
    )
    valid_dataset = prepare_dataset(
        tokenizer,
        valid_df.iloc[:, 0].tolist(),
        valid_df.iloc[:, 1].tolist(),
        max_length
    )

    # 加载预训练模型
    print("\n🚀 加载预训练模型...")
    model = AutoModelForSequenceClassification.from_pretrained(
        model_name,
        num_labels=model_config.num_labels,
        id2label={"0": "非垃圾短信", "1": "垃圾短信"},
        label2id={"非垃圾短信": 0, "垃圾短信": 1},
        # torch_dtype=torch.bfloat16,
        device_map="auto",
        trust_remote_code=True,
        proxies=proxies,
        use_cache=False  # 禁用缓存以兼容梯度检查点
    )
    if model.config.pad_token_id is None:
        model.config.pad_token_id = tokenizer.pad_token_id

    # 应用 LoRA 微调配置
    print("\n🎛️ 应用 LoRA 配置...")
    peft_config = LoraConfig(
        task_type=TaskType.SEQ_CLS,
        target_modules=lora_config.target_modules,
        inference_mode=False,
        r=lora_config.r,
        lora_alpha=lora_config.lora_alpha,
        lora_dropout=lora_config.lora_dropout,
        modules_to_save=["score"]  # 保留分类头参数可训练
    )
    model = get_peft_model(model, peft_config)
    model.print_trainable_parameters()

    # 检查可训练参数
    print("\n🔍 可训练参数检查:")
    for name, param in model.named_parameters():
        if param.requires_grad:
            print(f"可训练参数: {name}")

    # 配置训练参数
    training_args = TrainingArguments(
        output_dir=train_config.output_dir,
        eval_strategy="epoch",  # 修正参数名称为evaluation_strategy，报错就改为eval_strategy
        save_strategy="epoch",
        learning_rate=train_config.learning_rate,
        per_device_train_batch_size=train_config.batch_size,
        per_device_eval_batch_size=train_config.eval_batch_size,
        gradient_accumulation_steps=train_config.gradient_accumulation,
        num_train_epochs=train_config.max_epochs,
        weight_decay=train_config.weight_decay,
        fp16=train_config.fp16,
        gradient_checkpointing=train_config.gradient_checkpointing,
        load_best_model_at_end=True,
        metric_for_best_model="accuracy",
        logging_steps=50,
        save_total_limit=2,
        report_to="none",
        optim="adamw_torch",
        dataloader_num_workers=4,
        lr_scheduler_type="cosine",
        warmup_ratio=0.1
    )

    # 评估函数（计算准确率）
    def compute_metrics(p):
        preds = np.argmax(p.predictions, axis=1)
        return {"accuracy": accuracy_score(p.label_ids, preds)}

    # 构建 Trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=valid_dataset,
        compute_metrics=compute_metrics,
        callbacks=[EarlyStoppingCallback(early_stopping_patience=train_config.early_stop_patience)]
    )

    # 开始微调训练
    print("\n🚂 启动微调...")
    train_result = trainer.train()

    # 保存最佳模型和 LoRA 权重
    print("\n💾 保存最佳模型...")
    best_model_dir = os.path.join(train_config.output_dir, "best_model")
    model.save_pretrained(best_model_dir)
    tokenizer.save_pretrained(best_model_dir)
    lora_model_dir = model_config_qwen.model_path_lora
    model.save_pretrained(lora_model_dir)
    print(f"LoRA 权重保存至: {lora_model_dir}")

if __name__ == "__main__":
    torch.backends.cuda.matmul.allow_tf32 = True  # 启用 TF32 矩阵运算
    torch.backends.cudnn.benchmark = True         # 启用 cuDNN 基准优化
    os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"  # 减少显存碎片
    main()





🖥️ 硬件配置检查:
PyTorch 版本: 2.6.0+cu118
CUDA 可用: True
GPU: NVIDIA GeForce RTX 3050 Laptop GPU
显存: 4.0GB

📂 加载数据集...
训练集: 60 条, 验证集: 20 条

📊 文本长度分析:
最小: 5, 最大: 12, 平均: 8.0
75% 分位数: 9.0, 95% 分位数: 11.0
推荐 max_length: 128


Tokenizing (num_proc=4): 100%|██████████| 60/60 [00:07<00:00,  8.56 examples/s]
Tokenizing (num_proc=4): 100%|██████████| 20/20 [00:06<00:00,  3.05 examples/s]



🚀 加载预训练模型...


Some weights of Qwen2ForSequenceClassification were not initialized from the model checkpoint at qwen/Qwen2-0.5B-Instruct and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



🎛️ 应用 LoRA 配置...
trainable params: 1,083,136 || all params: 495,117,696 || trainable%: 0.2188

🔍 可训练参数检查:
可训练参数: base_model.model.model.layers.0.self_attn.q_proj.lora_A.default.weight
可训练参数: base_model.model.model.layers.0.self_attn.q_proj.lora_B.default.weight
可训练参数: base_model.model.model.layers.0.self_attn.k_proj.lora_A.default.weight
可训练参数: base_model.model.model.layers.0.self_attn.k_proj.lora_B.default.weight
可训练参数: base_model.model.model.layers.0.self_attn.v_proj.lora_A.default.weight
可训练参数: base_model.model.model.layers.0.self_attn.v_proj.lora_B.default.weight
可训练参数: base_model.model.model.layers.0.self_attn.o_proj.lora_A.default.weight
可训练参数: base_model.model.model.layers.0.self_attn.o_proj.lora_B.default.weight
可训练参数: base_model.model.model.layers.1.self_attn.q_proj.lora_A.default.weight
可训练参数: base_model.model.model.layers.1.self_attn.q_proj.lora_B.default.weight
可训练参数: base_model.model.model.layers.1.self_attn.k_proj.lora_A.default.weight
可训练参数: base_model.model.model.layer

No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.



🚂 启动微调...




Epoch,Training Loss,Validation Loss,Accuracy
1,No log,7.451737,0.15
2,No log,7.451737,0.15
3,No log,7.451737,0.15





💾 保存最佳模型...
LoRA 权重保存至: ./lora_weights/qwen


### 推理代码

In [4]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from peft import PeftModel, PeftConfig
import torch
import model_config_qwen  # 自定义配置文件，需根据实际情况修改

# 加载模型配置
peft_model_id = model_config_qwen.model_path_lora  # LoRA 权重保存路径
config = PeftConfig.from_pretrained(peft_model_id)

# 初始化 Tokenizer
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path, trust_remote_code=True)

# 加载基础模型
model = AutoModelForSequenceClassification.from_pretrained(
    config.base_model_name_or_path,
    num_labels=model_config_qwen.num_labels,
    id2label={"0": "非垃圾短信", "1": "垃圾短信"},
    label2id={"非垃圾短信": 0, "垃圾短信": 1},
    device_map="auto",
    torch_dtype=torch.float16,
    trust_remote_code=True
)

# 加载 LoRA 权重
model = PeftModel.from_pretrained(model, peft_model_id)

# 切换到推理模式
model.eval()

# 推理函数
def predict(text):
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
    inputs = {k: v.to(model.device) for k, v in inputs.items()}
    with torch.no_grad():
        outputs = model(**inputs)
    logits = outputs.logits
    predicted_class_id = torch.argmax(logits, dim=-1).item()
    return model.config.id2label[str(predicted_class_id)]  # 确保键是字符串类型

# 测试推理
text = "这是一个测试文本，判断是否为垃圾短信。"
result = predict(text)
print(f"预测结果: {result}")

Some weights of Qwen2ForSequenceClassification were not initialized from the model checkpoint at qwen/Qwen2-0.5B-Instruct and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


预测结果: 垃圾短信
