# 微信聊天角色扮演 - Qwen3-0.6B 微调

使用LoRA微调Qwen3-0.6B，让模型学习模仿聊天记录中对方的说话风格。

## 环境要求
- Google Colab (GPU)
- 免费版T4 GPU即可运行

In [5]:
# 安装依赖
!pip install -q transformers>=4.40.0 peft>=0.10.0 trl>=0.8.0 datasets accelerate bitsandbytes

In [10]:
import json
import torch
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    TrainingArguments,
    BitsAndBytesConfig
)
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from trl import SFTTrainer
# 检查GPU
print(f"GPU可用: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU型号: {torch.cuda.get_device_name(0)}")
    print(f"显存: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")

GPU可用: True
GPU型号: Tesla T4
显存: 15.8 GB


## 1. 上传训练数据

将预处理好的 `train_data.json` 上传到Colab

In [8]:
from google.colab import files

# 上传训练数据
uploaded = files.upload()
data_file = list(uploaded.keys())[0]
print(f"已上传: {data_file}")

Saving train_data.json to train_data.json
已上传: train_data.json


In [11]:
# 加载训练数据
with open(data_file, 'r', encoding='utf-8') as f:
    train_data = json.load(f)

print(f"训练样本数量: {len(train_data)}")
print(f"\n样本示例:")
print(json.dumps(train_data[0], ensure_ascii=False, indent=2))

训练样本数量: 9

样本示例:
{
  "conversations": [
    {
      "role": "user",
      "content": "到家了没"
    },
    {
      "role": "assistant",
      "content": "今天蟹蟹啦\n把钱转给你\n到啦到啦\n你们到家了吗"
    },
    {
      "role": "user",
      "content": "我们在外面消消食"
    },
    {
      "role": "assistant",
      "content": "好滴好滴，那下回我请"
    }
  ]
}


## 2. 加载模型和Tokenizer

In [12]:
MODEL_ID = "Qwen/Qwen3-0.6B"

# 加载Tokenizer
tokenizer = AutoTokenizer.from_pretrained(
    MODEL_ID,
    trust_remote_code=True
)

# 设置pad_token
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

print(f"词表大小: {len(tokenizer)}")
print(f"特殊token: pad={tokenizer.pad_token}, eos={tokenizer.eos_token}")

Error while fetching `HF_TOKEN` secret value from your vault: 'Requesting secret HF_TOKEN timed out. Secrets can only be fetched when running from the Colab UI.'.
You are not authenticated with the Hugging Face Hub in this notebook.
If the error persists, please let us know by opening an issue on GitHub (https://github.com/huggingface/huggingface_hub/issues/new).


config.json:   0%|          | 0.00/726 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json:   0%|          | 0.00/11.4M [00:00<?, ?B/s]

词表大小: 151669
特殊token: pad=<|endoftext|>, eos=<|im_end|>


In [13]:
# 4-bit量化配置（节省显存）
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True
)

# 加载模型
model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)

# 准备模型进行k-bit训练
model = prepare_model_for_kbit_training(model)

print(f"模型加载完成")
print(f"模型参数量: {model.num_parameters() / 1e6:.1f}M")

model.safetensors:   0%|          | 0.00/1.50G [00:00<?, ?B/s]

Loading weights:   0%|          | 0/311 [00:00<?, ?it/s]



generation_config.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

模型加载完成
模型参数量: 751.6M


## 3. 配置LoRA

In [None]:
# LoRA配置
lora_config = LoraConfig(
    r=16,                    # LoRA秩
    lora_alpha=32,           # 缩放系数
    target_modules=[
        "q_proj", "k_proj", "v_proj", "o_proj",  # 注意力层
        "gate_proj", "up_proj", "down_proj"       # FFN层
    ],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

# 应用LoRA
model = get_peft_model(model, lora_config)

# 打印可训练参数
model.print_trainable_parameters()

## 4. 准备数据集

In [None]:
def format_conversation(example):
    """
    将对话格式化为Qwen3的chat模板
    """
    messages = example["conversations"]

    # 使用tokenizer的chat模板
    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=False
    )

    return {"text": text}

# 创建Dataset
dataset = Dataset.from_list(train_data)
dataset = dataset.map(format_conversation)

print(f"数据集大小: {len(dataset)}")
print(f"\n格式化后的样本:")
print(dataset[0]["text"][:500])

## 5. 训练配置

In [None]:
# 训练参数
training_args = TrainingArguments(
    output_dir="./output",
    num_train_epochs=3,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    learning_rate=2e-4,
    lr_scheduler_type="cosine",
    warmup_ratio=0.1,
    fp16=True,
    logging_steps=10,
    save_strategy="epoch",
    save_total_limit=2,
    optim="paged_adamw_8bit",
    report_to="none",
    seed=42
)

print("训练配置:")
print(f"  Epochs: {training_args.num_train_epochs}")
print(f"  Batch size: {training_args.per_device_train_batch_size}")
print(f"  Gradient accumulation: {training_args.gradient_accumulation_steps}")
print(f"  Effective batch size: {training_args.per_device_train_batch_size * training_args.gradient_accumulation_steps}")
print(f"  Learning rate: {training_args.learning_rate}")

In [None]:
# 创建Trainer
trainer = SFTTrainer(
    model=model,
    args=training_args,
    train_dataset=dataset,
    processing_class=tokenizer,
    max_seq_length=512,
    dataset_text_field="text",
    packing=False
)

print("Trainer已创建，准备开始训练")

## 6. 开始训练

In [None]:
# 开始训练
print("开始训练...")
trainer.train()
print("训练完成！")

## 7. 保存模型

In [None]:
# 保存LoRA适配器
LORA_OUTPUT_DIR = "./lora_adapter"
model.save_pretrained(LORA_OUTPUT_DIR)
tokenizer.save_pretrained(LORA_OUTPUT_DIR)

print(f"LoRA适配器已保存至: {LORA_OUTPUT_DIR}")

In [None]:
# （可选）合并为完整模型
MERGE_OUTPUT_DIR = "./merged_model"

# 重新加载基座模型（不量化）
base_model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    torch_dtype=torch.float16,
    device_map="auto",
    trust_remote_code=True
)

# 加载LoRA权重并合并
from peft import PeftModel
merged_model = PeftModel.from_pretrained(base_model, LORA_OUTPUT_DIR)
merged_model = merged_model.merge_and_unload()

# 保存合并后的模型
merged_model.save_pretrained(MERGE_OUTPUT_DIR)
tokenizer.save_pretrained(MERGE_OUTPUT_DIR)

print(f"合并后的模型已保存至: {MERGE_OUTPUT_DIR}")

## 8. 测试模型

In [None]:
def chat(model, tokenizer, user_input, history=[]):
    """
    单轮对话测试
    """
    messages = history + [{"role": "user", "content": user_input}]

    prompt = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )

    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=128,
            do_sample=True,
            temperature=0.7,
            top_p=0.9,
            repetition_penalty=1.1,
            pad_token_id=tokenizer.pad_token_id
        )

    generated = outputs[0][inputs["input_ids"].shape[1]:]
    response = tokenizer.decode(generated, skip_special_tokens=True)

    return response.strip()

In [None]:
# 测试对话
test_inputs = [
    "在干嘛",
    "今天累不累",
    "晚上吃什么",
    "一起看电影吧"
]

print("=== 模型测试 ===")
for user_input in test_inputs:
    response = chat(merged_model, tokenizer, user_input)
    print(f"用户: {user_input}")
    print(f"模型: {response}")
    print()

## 9. 下载模型

将训练好的模型下载到本地

In [None]:
# 打包LoRA适配器
!zip -r lora_adapter.zip ./lora_adapter

# 下载
from google.colab import files
files.download('lora_adapter.zip')

print("LoRA适配器已下载，解压后放到本地项目目录使用")

In [None]:
# （可选）打包合并后的完整模型
# 注意：完整模型文件较大，下载可能需要较长时间
!zip -r merged_model.zip ./merged_model

from google.colab import files
files.download('merged_model.zip')

## 本地使用方法

```bash
# 方法1: 使用LoRA适配器
python inference/chat.py --model Qwen/Qwen3-0.6B --lora ./lora_adapter

# 方法2: 使用合并后的模型
python inference/chat.py --model ./merged_model
```