# 中文角色扮演微调

**目标**：在有限计算资源（Colab 免费 GPU）下，对 Qwen2-1.5B-Chat 做小规模 SFT，使其能根据 `instruction`（角色设定）与 `input` 进行一轮符合设定的对话并输出 `output`。

## 1. 安装依赖

In [None]:
!git clone https://github.com/chenkx612/Qwen2-Roleplay-SFT.git
%cd Qwen2-Roleplay-SFT

In [1]:
!pip install -q bitsandbytes

## 2. 配置

In [2]:
MODEL_NAME = "Qwen/Qwen2-1.5B-Chat"
OUTPUT_DIR = "./checkpoints"
CACHE_DIR = "/content/hf_cache"
USE_4BIT = True
PER_DEVICE_BATCH_SIZE = 1
GRADIENT_ACCUMULATION_STEPS = 8
NUM_EPOCHS = 2
LEARNING_RATE = 2e-4
MAX_LENGTH = 512
SEED = 42
TOP_N = 100
DO_SAMPLE = True
DECODE_TEMPERATURE = 1.0  # 控制生成多样性，越大越随机
DECODE_TOP_K = 30         # 采样时考虑的最高概率token数
DECODE_TOP_P = 0.9        # nucleus采样累计概率阈值
DECODE_MAX_NEW_TOKENS = 128  # 生成最大新token数
PROMPT_TMPL = "角色设定：{instruction}\n用户：{input}\n助手："

In [None]:
import os
import torch

os.makedirs(OUTPUT_DIR, exist_ok=True)
torch.manual_seed(SEED)

## 3. 加载并筛选数据集

In [None]:
from datasets import load_dataset, Dataset

raw_ds = load_dataset("LooksJuicy/Chinese-Roleplay-SingleTurn")

def is_valid_sample(sample, min_len=100, max_len=300):
    total_len = len(sample['instruction']) + len(sample['input']) + len(sample['output'])
    if total_len < min_len or total_len > max_len:
        return False
    if not sample['instruction'].strip() or not sample['input'].strip() or not sample['output'].strip():
        return False
    return True

filtered_samples = [s for s in raw_ds if is_valid_sample(s)]
filtered_samples = sorted(filtered_samples, key=lambda x: len(x['instruction']) + len(x['input']) + len(x['output']), reverse=True)
train_ds = Dataset.from_list(filtered_samples[:min(TOP_N, len(filtered_samples))])
print(f"筛选后样本数: {len(train_ds)}")
print(train_ds[0])

## 4. Tokenization

In [5]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, cache_dir=CACHE_DIR, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

In [7]:
from utils import make_prompt

def tokenize_and_build_labels(batch):
    prompts = [make_prompt({"instruction": ins, "input": inp}, PROMPT_TMPL) for ins, inp in zip(batch['instruction'], batch['input'])]
    responses = batch['output']
    tokenized_prompt = tokenizer(prompts, truncation=True, max_length=MAX_LENGTH, add_special_tokens=False)
    tokenized_response = tokenizer(responses, truncation=True, max_length=MAX_LENGTH, add_special_tokens=False)
    input_ids_list, labels_list = [], []
    for p_ids, r_ids in zip(tokenized_prompt['input_ids'], tokenized_response['input_ids']):
        ids = p_ids + r_ids + [tokenizer.eos_token_id]
        labels = [-100] * len(p_ids) + r_ids + [tokenizer.eos_token_id]
        # pad to MAX_LENGTH
        if len(ids) < MAX_LENGTH:
            pad_len = MAX_LENGTH - len(ids)
            ids = ids + [tokenizer.pad_token_id] * pad_len
            labels = labels + [-100] * pad_len
        else:
            ids = ids[:MAX_LENGTH]
            labels = labels[:MAX_LENGTH]
        input_ids_list.append(ids)
        labels_list.append(labels)
    return {"input_ids": input_ids_list, "labels": labels_list}

train_tokenized = train_ds.map(tokenize_and_build_labels, batched=True, remove_columns=train_ds.column_names)

## 5. 加载模型

In [8]:
from transformers import BitsAndBytesConfig, AutoModelForCausalLM 

if USE_4BIT:
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_compute_dtype=torch.float16,
        bnb_4bit_quant_type="nf4"
    )
    base_model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, cache_dir=CACHE_DIR, trust_remote_code=True, device_map='auto', quantization_config=bnb_config)
else:
    base_model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, cache_dir=CACHE_DIR, trust_remote_code=True, device_map='auto')

## 6. 微调前测试

In [None]:
# 批量生成微调前output
from utils import get_device_of_model

def generate_with_role_raw(m, instruction, user_input,
                          max_new_tokens=DECODE_MAX_NEW_TOKENS,
                          temperature=DECODE_TEMPERATURE,
                          top_k=DECODE_TOP_K,
                          top_p=DECODE_TOP_P,
                          do_sample=DO_SAMPLE):
    prompt = PROMPT_TMPL.format(instruction=instruction, input=user_input)
    inputs = tokenizer(prompt, return_tensors='pt')
    device = get_device_of_model(m)
    inputs = {k:v.to(device) for k,v in inputs.items()}
    with torch.no_grad():
        out = m.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            temperature=temperature,
            top_k=top_k,
            top_p=top_p,
            do_sample=do_sample
        )
    text = tokenizer.decode(out[0], skip_special_tokens=True)
    return text[len(prompt):] if text.startswith(prompt) else text

In [None]:
# 加载测试样本
import json
DATA_PATH = 'samples.json'
with open(DATA_PATH, 'r', encoding='utf-8') as f:
    sft_samples = json.load(f)

In [None]:
baseline_outputs = []
base_model.eval()
for i, sample in enumerate(sft_samples):
    out = generate_with_role_raw(base_model, sample['instruction'], sample['input'])
    baseline_outputs.append(out)

## 7. 微调

In [10]:
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from transformers import Trainer, TrainingArguments, default_data_collator

model = prepare_model_for_kbit_training(base_model)

lora_config = LoraConfig(
    r=8,
    lora_alpha=32,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)
model = get_peft_model(model, lora_config)

training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    per_device_train_batch_size=PER_DEVICE_BATCH_SIZE,
    gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,
    num_train_epochs=NUM_EPOCHS,
    learning_rate=LEARNING_RATE,
    logging_steps=20,
    bf16=False,
    fp16=True if torch.cuda.is_available() else False,
    save_total_limit=2,
    save_strategy='epoch',
    remove_unused_columns=False,
    report_to="none"  # 关闭wandb日志
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_tokenized,
    data_collator=default_data_collator,
    tokenizer=tokenizer,
)
trainer.train()

In [11]:
# 输出最终训练集loss
if hasattr(trainer, 'state') and hasattr(trainer.state, 'log_history') and trainer.state.log_history:
    # 查找最后一个包含loss的log
    last_loss = None
    for log in reversed(trainer.state.log_history):
        if 'loss' in log:
            last_loss = log['loss']
            break
    if last_loss is not None:
        print(f"最终训练集 loss: {last_loss:.4f}")
    else:
        print("未找到最终loss记录")
else:
    print("Trainer未记录loss信息")

## 8. 微调后测试

In [12]:
# 批量生成微调后output
model.eval()
finetuned_outputs = []
for i, sample in enumerate(sft_samples):
    out = generate_with_role_raw(model, sample['instruction'], sample['input'])
    finetuned_outputs.append(out)

In [13]:
import pandas as pd
from IPython.display import display
table_data = []
for i, sample in enumerate(sft_samples):
    table_data.append({
        '角色': sample['name'],
        '输入': sample['input'],
        '微调前': baseline_outputs[i] if i < len(baseline_outputs) else '',
        '微调后': finetuned_outputs[i] if i < len(finetuned_outputs) else ''
    })
df = pd.DataFrame(table_data, columns=['角色','输入','微调前','微调后'])
display(df)

## 9. 保存

In [14]:
import shutil

# 保存 LoRA adapter
print('\n保存 LoRA adapter 至:', OUTPUT_DIR)
model.save_pretrained(OUTPUT_DIR)

# 把保存的文件打包，方便下载
archive_path = shutil.make_archive(OUTPUT_DIR, 'zip', OUTPUT_DIR)
print('\n已将 adapter 和 tokenizer 打包为:', archive_path)