# Qwen2.5微调实战：24点游戏推理风格聊天

[![](https://raw.githubusercontent.com/SwanHubX/assets/main/badge1.svg)](https://swanlab.cn/@ZeyiLin/qwen2.5-sft-24game/overview)

- **基础模型**：[Qwen2.5-0.5B](https://modelscope.cn/models/Qwen/Qwen2.5-0.5B/summary)
- **微调后模型**：[Qwen2.5-0.5b-24game-sft](https://modelscope.cn/models/testUser/Qwen2.5-0.5b-24game-sft/summary)
- **数据集**：24点游戏推理数据集
- **SwanLab**：[qwen2.5-sft-24game](https://swanlab.cn/@ZeyiLin/qwen2.5-sft-24game/runs/agps0dkifth5l1xytcdyk/chart)
- **微调方式**：全参数微调、LoRA微调
- **推理风格**：详细分析推理风格
- **算力要求**：
  - **全参数微调**：16GB显存
  - **LoRA微调**：16GB显存

## 1. 安装环境

In [1]:
!pip install swanlab
!pip install modelscope==1.22.0
!pip install transformers
!pip install datasets==3.2.0
!pip install peft
!pip install accelerate
!pip install pandas
!pip install addict



## 2. 下载数据

In [None]:
# 数据已准备完毕，训练和测试数据分别为 train_data.json 和 test_data.json

## 3. 登录SwanLab
1. 前往[swanlab](https://swanlab.cn/space/~/settings)复制你的API Key，粘贴到下面的代码中
2. 如果你不希望将登录信息保存到该计算机中，可将`save=True`去掉（每次运行训练需要重新执行下面的代码块）
3. 训练项目名称已设置为 "qwen3-sft-24game"

In [2]:
import swanlab

swanlab.login(api_key="G091Sp6B1xT6JfwsVZhCx", save=True)

## 4. 开启24点游戏模型微调

In [None]:
import json
import pandas as pd
import torch
from datasets import Dataset
from modelscope import snapshot_download, AutoTokenizer
from transformers import AutoModelForCausalLM, TrainingArguments, Trainer, DataCollatorForSeq2Seq
import os
import swanlab

os.environ["SWANLAB_PROJECT"]="qwen2.5-sft-24game"
PROMPT = "你是一个24点游戏专家，擅长分析数字组合并找出运算方案。你需要根据用户给出的数字，详细分析解题思路和过程，最终给出能够得到24的运算方案。"
MAX_LENGTH = 2048

swanlab.config.update({
    "model": "Qwen/Qwen2.5-0.5B",
    "prompt": PROMPT,
    "data_max_length": MAX_LENGTH,
    })

def dataset_jsonl_transfer(origin_path, new_path):
    """
    将原始数据集转换为大模型微调所需数据格式的新数据集
    """
    messages = []

    # 读取原始JSON文件
    with open(origin_path, "r", encoding="utf-8") as file:
        data = json.load(file)
    
    # 处理每个对话
    for item in data:
        conversation = item["messages"]
        user_content = conversation[0]["content"]  # 用户问题
        assistant_content = conversation[1]["content"]  # 助手回答
        
        message = {
            "instruction": PROMPT,
            "input": user_content,
            "output": assistant_content,
        }
        messages.append(message)

    # 保存重构后的JSONL文件
    with open(new_path, "w", encoding="utf-8") as file:
        for message in messages:
            file.write(json.dumps(message, ensure_ascii=False) + "\n")


def process_func(example):
    """
    将数据集进行预处理
    """ 
    input_ids, attention_mask, labels = [], [], []
    instruction = tokenizer(
        f"<|im_start|>system\n{PROMPT}<|im_end|>\n<|im_start|>user\n{example['input']}<|im_end|>\n<|im_start|>assistant\n",
        add_special_tokens=False,
    )
    response = tokenizer(f"{example['output']}", add_special_tokens=False)
    input_ids = instruction["input_ids"] + response["input_ids"] + [tokenizer.pad_token_id]
    attention_mask = (
        instruction["attention_mask"] + response["attention_mask"] + [1]
    )
    labels = [-100] * len(instruction["input_ids"]) + response["input_ids"] + [tokenizer.pad_token_id]
    if len(input_ids) > MAX_LENGTH:  # 做一个截断
        input_ids = input_ids[:MAX_LENGTH]
        attention_mask = attention_mask[:MAX_LENGTH]
        labels = labels[:MAX_LENGTH]
    return {"input_ids": input_ids, "attention_mask": attention_mask, "labels": labels}   


def predict(messages, model, tokenizer):
    device = "cuda"
    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )
    model_inputs = tokenizer([text], return_tensors="pt").to(device)

    generated_ids = model.generate(
        model_inputs.input_ids,
        max_new_tokens=MAX_LENGTH,
    )
    generated_ids = [
        output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
    ]

    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]

    return response

# 在modelscope上下载Qwen模型到本地目录下
model_dir = snapshot_download("Qwen/Qwen2.5-0.5B", cache_dir="./", revision="master")

# Transformers加载模型权重
tokenizer = AutoTokenizer.from_pretrained(model_dir, use_fast=False, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(model_dir, device_map="auto", torch_dtype=torch.bfloat16)
model.enable_input_require_grads()  # 开启梯度检查点时，要执行该方法

# 加载、处理数据集和测试集
train_dataset_path = "train_data.json"
test_dataset_path = "test_data.json"

train_jsonl_new_path = "train_format.jsonl"
test_jsonl_new_path = "val_format.jsonl"

if not os.path.exists(train_jsonl_new_path):
    dataset_jsonl_transfer(train_dataset_path, train_jsonl_new_path)
if not os.path.exists(test_jsonl_new_path):
    dataset_jsonl_transfer(test_dataset_path, test_jsonl_new_path)

# 得到训练集
train_df = pd.read_json(train_jsonl_new_path, lines=True)
train_ds = Dataset.from_pandas(train_df)
train_dataset = train_ds.map(process_func, remove_columns=train_ds.column_names)

# 得到验证集
eval_df = pd.read_json(test_jsonl_new_path, lines=True)
eval_ds = Dataset.from_pandas(eval_df)
eval_dataset = eval_ds.map(process_func, remove_columns=eval_ds.column_names)

args = TrainingArguments(
    output_dir="./output/Qwen2.5-0.5B-24game",
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=4,
    eval_strategy="steps",
    eval_steps=100,
    logging_steps=10,
    num_train_epochs=2,
    save_steps=400,
    learning_rate=1e-4,
    save_on_each_node=True,
    gradient_checkpointing=True,
    report_to="swanlab",
    run_name="qwen2.5-0.5B-24game",
)

trainer = Trainer(
    model=model,
    args=args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    data_collator=DataCollatorForSeq2Seq(tokenizer=tokenizer, padding=True),
)

trainer.train()

# 用测试集的前3条，主观看模型
test_df = pd.read_json(test_jsonl_new_path, lines=True)[:3]

test_text_list = []

for index, row in test_df.iterrows():
    instruction = row['instruction']
    input_value = row['input']

    messages = [
        {"role": "system", "content": f"{instruction}"},
        {"role": "user", "content": f"{input_value}"}
    ]

    response = predict(messages, model, tokenizer)

    response_text = f"""
    Question: {input_value}

    LLM:{response}
    """
    
    test_text_list.append(swanlab.Text(response_text))
    print(response_text)

swanlab.log({"Prediction": test_text_list})

swanlab.finish()

In [None]:
# LoRA微调版本 - 支持更大模型在T4上训练
import json
import pandas as pd
import torch
from datasets import Dataset
from modelscope import snapshot_download, AutoTokenizer
from transformers import AutoModelForCausalLM, TrainingArguments, Trainer, DataCollatorForSeq2Seq, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
import os
import swanlab

# =============配置参数============= 
# 模型选择：可以选择不同大小的模型
MODEL_OPTIONS = {
    "qwen2.5-0.5b": "Qwen/Qwen2.5-0.5B", 
    "qwen2.5-1.5b": "Qwen/Qwen2.5-1.5B",
    "qwen2.5-3b": "Qwen/Qwen2.5-3B", 
    "qwen2.5-7b": "Qwen/Qwen2.5-7B"
}

# 选择要使用的模型
SELECTED_MODEL = "qwen2.5-7b"  # 改为7B模型以获得更好的推理能力
MODEL_NAME = MODEL_OPTIONS[SELECTED_MODEL]

# 训练模式配置
USE_LORA = True  # 是否使用LoRA微调
USE_QUANTIZATION = True  # 是否使用量化（4bit）来进一步节省显存

# LoRA配置
LORA_CONFIG = {
    "r": 16,  # LoRA rank，影响模型表达能力
    "lora_alpha": 32,  # LoRA缩放参数
    "target_modules": ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],  # Qwen2.5的attention和MLP层
    "lora_dropout": 0.1,
    "bias": "none",
    "task_type": "CAUSAL_LM"
}

# 训练参数配置  
TRAINING_CONFIG = {
    "output_dir": f"./output/{SELECTED_MODEL}-24game-lora",
    "per_device_train_batch_size": 1,  # T4显存限制，使用小batch
    "per_device_eval_batch_size": 1,
    "gradient_accumulation_steps": 8,  # 增加梯度累积来模拟更大batch
    "eval_strategy": "steps",
    "eval_steps": 50,
    "logging_steps": 10,
    "num_train_epochs": 3,  # 增加训练轮数
    "save_steps": 200,
    "learning_rate": 2e-4,  # LoRA通常需要更高学习率
    "warmup_steps": 100,
    "lr_scheduler_type": "cosine",
    "weight_decay": 0.01,
    "max_grad_norm": 1.0,
    "save_on_each_node": True,
    "gradient_checkpointing": True,
    "dataloader_pin_memory": False,  # 节省显存
    "report_to": "swanlab",
    "run_name": f"{SELECTED_MODEL}-24game-lora",
}

# 其他配置
os.environ["SWANLAB_PROJECT"] = "qwen2.5-sft-24game-lora"
PROMPT = "你是一个24点游戏专家，擅长分析数字组合并找出运算方案。你需要根据用户给出的数字，详细分析解题思路和过程，最终给出能够得到24的运算方案。"
MAX_LENGTH = 1024  # 减少序列长度节省显存

# 更新SwanLab配置
swanlab.config.update({
    "model": MODEL_NAME,
    "use_lora": USE_LORA,
    "use_quantization": USE_QUANTIZATION,
    "lora_config": LORA_CONFIG,
    "training_config": TRAINING_CONFIG,
    "data_max_length": MAX_LENGTH,
})

print(f"选择的模型: {MODEL_NAME}")
print(f"使用LoRA: {USE_LORA}")
print(f"使用量化: {USE_QUANTIZATION}")
print(f"预计显存需求: ~12-14GB (相比全参数微调7B需要40GB+)")

In [None]:
# 模型加载和LoRA配置
def load_model_and_tokenizer():
    """加载模型和tokenizer，支持量化和LoRA"""
    
    # 下载模型
    print(f"正在下载模型: {MODEL_NAME}")
    model_dir = snapshot_download(MODEL_NAME, cache_dir="./", revision="master")
    
    # 加载tokenizer
    tokenizer = AutoTokenizer.from_pretrained(model_dir, use_fast=False, trust_remote_code=True)
    tokenizer.pad_token = tokenizer.eos_token  # 确保有pad_token
    
    # 配置量化（如果启用）
    quantization_config = None
    if USE_QUANTIZATION:
        quantization_config = BitsAndBytesConfig(
            load_in_4bit=True,  # 4bit量化
            bnb_4bit_compute_dtype=torch.bfloat16,  # 计算类型
            bnb_4bit_quant_type="nf4",  # 量化类型
            bnb_4bit_use_double_quant=True,  # 双重量化，进一步节省显存
        )
        print("启用4bit量化")
    
    # 加载模型
    model = AutoModelForCausalLM.from_pretrained(
        model_dir,
        quantization_config=quantization_config,
        device_map="auto",  # 自动分配GPU
        torch_dtype=torch.bfloat16 if not USE_QUANTIZATION else None,
        trust_remote_code=True,
        attn_implementation="flash_attention_2" if torch.cuda.is_available() else None,  # 使用Flash Attention加速
    )
    
    # 准备LoRA微调
    if USE_LORA:
        print("配置LoRA...")
        
        # 如果使用量化，需要准备模型
        if USE_QUANTIZATION:
            model = prepare_model_for_kbit_training(model)
        
        # 创建LoRA配置
        lora_config = LoraConfig(**LORA_CONFIG)
        
        # 应用LoRA
        model = get_peft_model(model, lora_config)
        
        # 打印可训练参数统计
        model.print_trainable_parameters()
    else:
        # 全参数微调需要启用输入梯度
        model.enable_input_require_grads()
    
    return model, tokenizer

# 加载模型
model, tokenizer = load_model_and_tokenizer()

In [None]:
# 数据处理函数（与原版相同，但做了一些优化）
def dataset_jsonl_transfer(origin_path, new_path):
    """将原始数据集转换为大模型微调所需数据格式的新数据集"""
    messages = []
    
    with open(origin_path, "r", encoding="utf-8") as file:
        data = json.load(file)
    
    for item in data:
        conversation = item["messages"]
        user_content = conversation[0]["content"]
        assistant_content = conversation[1]["content"]
        
        message = {
            "instruction": PROMPT,
            "input": user_content,
            "output": assistant_content,
        }
        messages.append(message)
    
    with open(new_path, "w", encoding="utf-8") as file:
        for message in messages:
            file.write(json.dumps(message, ensure_ascii=False) + "\\n")

def process_func(example):
    """将数据集进行预处理，针对LoRA优化"""
    input_ids, attention_mask, labels = [], [], []
    
    # 构建输入序列
    instruction = tokenizer(
        f"<|im_start|>system\\n{PROMPT}<|im_end|>\\n<|im_start|>user\\n{example['input']}<|im_end|>\\n<|im_start|>assistant\\n",
        add_special_tokens=False,
    )
    response = tokenizer(f"{example['output']}", add_special_tokens=False)
    
    input_ids = instruction["input_ids"] + response["input_ids"] + [tokenizer.pad_token_id]
    attention_mask = instruction["attention_mask"] + response["attention_mask"] + [1]
    labels = [-100] * len(instruction["input_ids"]) + response["input_ids"] + [tokenizer.pad_token_id]
    
    # 截断到最大长度
    if len(input_ids) > MAX_LENGTH:
        input_ids = input_ids[:MAX_LENGTH]
        attention_mask = attention_mask[:MAX_LENGTH]
        labels = labels[:MAX_LENGTH]
    
    return {"input_ids": input_ids, "attention_mask": attention_mask, "labels": labels}

def predict(messages, model, tokenizer):
    """预测函数，支持LoRA模型"""
    device = "cuda" if torch.cuda.is_available() else "cpu"
    
    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )
    model_inputs = tokenizer([text], return_tensors="pt").to(device)
    
    with torch.no_grad():
        generated_ids = model.generate(
            model_inputs.input_ids,
            max_new_tokens=512,  # 降低生成长度节省显存
            temperature=0.7,
            do_sample=True,
            pad_token_id=tokenizer.pad_token_id,
        )
    
    generated_ids = [
        output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
    ]
    
    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
    return response

print("数据处理函数已定义")

In [None]:
# 数据加载和训练执行
# 数据路径
train_dataset_path = "train_data.json"
test_dataset_path = "test_data.json"
train_jsonl_new_path = "train_format_lora.jsonl"
test_jsonl_new_path = "val_format_lora.jsonl"

# 转换数据格式
if not os.path.exists(train_jsonl_new_path):
    dataset_jsonl_transfer(train_dataset_path, train_jsonl_new_path)
if not os.path.exists(test_jsonl_new_path):
    dataset_jsonl_transfer(test_dataset_path, test_jsonl_new_path)

# 加载和处理数据集
print("加载训练数据...")
train_df = pd.read_json(train_jsonl_new_path, lines=True)
train_ds = Dataset.from_pandas(train_df)
train_dataset = train_ds.map(process_func, remove_columns=train_ds.column_names)

print("加载验证数据...")
eval_df = pd.read_json(test_jsonl_new_path, lines=True)
eval_ds = Dataset.from_pandas(eval_df)
eval_dataset = eval_ds.map(process_func, remove_columns=eval_ds.column_names)

print(f"训练集大小: {len(train_dataset)}")
print(f"验证集大小: {len(eval_dataset)}")

# 创建训练参数
args = TrainingArguments(**TRAINING_CONFIG)

# 创建数据整理器
data_collator = DataCollatorForSeq2Seq(
    tokenizer=tokenizer, 
    padding=True,
    return_tensors="pt"
)

# 创建训练器
trainer = Trainer(
    model=model,
    args=args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    data_collator=data_collator,
)

# 开始训练
print("开始LoRA微调训练...")
print(f"使用模型: {MODEL_NAME}")
print(f"预计训练时间: ~2-4小时 (取决于数据量)")

trainer.train()

# 保存LoRA权重
if USE_LORA:
    print("保存LoRA权重...")
    model.save_pretrained(f"./output/{SELECTED_MODEL}-24game-lora-weights")
    tokenizer.save_pretrained(f"./output/{SELECTED_MODEL}-24game-lora-weights")

print("训练完成！")

In [None]:
# 模型测试和效果验证
print("开始测试微调后的模型...")

# 测试数据
test_df = pd.read_json(test_jsonl_new_path, lines=True)[:3]
test_text_list = []

for index, row in test_df.iterrows():
    instruction = row['instruction']
    input_value = row['input']
    expected_output = row['output']

    messages = [
        {"role": "system", "content": f"{instruction}"},
        {"role": "user", "content": f"{input_value}"}
    ]

    print(f"\\n=== 测试 {index + 1} ===")
    print(f"问题: {input_value}")
    
    # 生成回答
    response = predict(messages, model, tokenizer)
    
    print(f"模型回答: {response}")
    print("-" * 50)
    
    response_text = f"""
测试 {index + 1}:
问题: {input_value}

LoRA微调模型回答:
{response}

原始训练数据答案:
{expected_output[:200]}...
"""
    
    test_text_list.append(swanlab.Text(response_text))

# 记录到SwanLab
swanlab.log({"LoRA_Predictions": test_text_list})

# 显存使用情况
if torch.cuda.is_available():
    print(f"\\n当前GPU显存使用: {torch.cuda.memory_allocated() / 1024**3:.2f} GB")
    print(f"峰值GPU显存使用: {torch.cuda.max_memory_allocated() / 1024**3:.2f} GB")

print("\\n=== LoRA微调训练总结 ===")
print(f"基础模型: {MODEL_NAME}")
print(f"训练方式: {'LoRA微调' if USE_LORA else '全参数微调'}")
print(f"是否量化: {USE_QUANTIZATION}")
print(f"LoRA rank: {LORA_CONFIG['r']}")
print(f"目标模块: {LORA_CONFIG['target_modules']}")
print(f"预期效果: 7B模型应该具备更强的数学推理能力")

swanlab.finish()

## LoRA微调使用指南和优化建议

### 📊 显存对比
- **全参数微调7B**: 需要40GB+ 显存  
- **LoRA + 4bit量化**: 仅需12-14GB显存 ✅ 
- **T4 GPU**: 16GB显存 ✅ 完全可用

### 🎯 LoRA配置调优
1. **提高推理能力**:
   ```python
   LORA_CONFIG = {
       "r": 32,  # 增加rank提高表达能力
       "lora_alpha": 64,  # 对应调整alpha
       "target_modules": ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
   }
   ```

2. **节省显存**:
   ```python
   LORA_CONFIG = {
       "r": 8,   # 降低rank节省显存
       "target_modules": ["q_proj", "v_proj"],  # 只训练attention
   }
   ```

### 🚀 模型选择建议
- **qwen2.5-7b**: 最佳推理能力，需要~14GB显存
- **qwen2.5-3b**: 平衡选择，需要~10GB显存  
- **qwen2.5-1.5b**: 轻量级，需要~8GB显存

### 📈 训练优化技巧
1. **学习率调优**: LoRA通常需要2e-4到5e-4的较高学习率
2. **数据质量**: 确保训练数据中的数学计算过程正确
3. **评估指标**: 除了loss，还应该计算数学答案的准确率
4. **正则化**: 使用weight_decay防止过拟合

### ⚡ 进一步提升推理能力的方法
1. **思维链(CoT)训练**: 在数据中明确标注每个计算步骤
2. **工具调用**: 训练模型学会调用计算器验证结果  
3. **强化学习**: 使用RLHF进一步优化推理准确性
4. **集成学习**: 训练多个LoRA模型然后集成