In [1]:
from datasets import Dataset
import pandas as pd
from transformers import AutoTokenizer, AutoModelForCausalLM, DataCollatorForSeq2Seq, TrainingArguments, Trainer, GenerationConfig

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from loguru import logger
from ruamel.yaml import YAML
from rich import print as rprint

yaml = YAML()
with open('config.yaml', 'r') as f:
    config = yaml.load(f)
config

{'file_load': {'model_path': '/home/liangshuqiao/models/qwen3', 'dataset_path': 'dataset/chat_expanded_new_resampled.csv', 'eval_dataset_path': 'dataset/meituan_data_clean_final.csv', 'test_size': 0.2, 'split_way': 'train', 'save_model_path': '/home/liangshuqiao/agent_source/model_outputs/Qwen3', 'gguf_path': 'model_output/qwen2_14b_all_lora_gguf', 'logging_path': 'trainlog', 'shuffle': True}, 'training_arg': {'dtype': 'torch.float16', 'load_in_4bit': True, 'batch_size': 16, 'gradient_accumulator_steps': 1, 'warmup_steps': 0, 'epoch': 100, 'eval_steps': 10, 'learning_rate': 1e-05, 'lr_scheduler_type': 'cosine', 'max_seq_length': 512, 'use_history': False, 'r': 8, 'target_modules': ['q_proj', 'k_proj', 'v_proj', 'o_proj', 'gate_proj', 'up_proj', 'down_proj'], 'lora_alpha': 16, 'lora_dropout': 0, 'bias': 'none', 'use_gradient_checkpointing': 'unsloth', 'random_state': 3407, 'use_rslora': True, 'loftq_config': 'None'}, 'alpaca_prompt': '"""\n\n###instruction\n{}\n###conversation_history\n

In [3]:
with open(config['file_load']['dataset_path'],'r') as f:
    dataframe = pd.read_csv(f)
dataset = Dataset.from_pandas(dataframe)

with open(config['file_load']['eval_dataset_path'],'r') as f_e:
    eval_dataframe = pd.read_csv(f_e)
eval_dataset = Dataset.from_pandas(eval_dataframe)

dataset[:3]

{'id': [202, 698, 533],
 'user': ['我对热玛吉治疗很感兴趣，但它对所有类型的皮肤都安全吗？',
  '做果酸换肤后皮肤会瘙痒吗？',
  '我想做面部吸脂手术，医生技术可靠吗？'],
 'response': ['热玛吉治疗适用于大多数皮肤类型，但在开始前我们会进行全面评估，确保最适合您的皮肤状况。',
  '做果酸换肤后皮肤可能会有轻微瘙痒感，这是皮肤在修复的表现，不要搔抓，按医嘱护理即可。',
  '做面部吸脂手术的医生技术可靠，有丰富的吸脂经验和专业技能。']}

In [4]:
dataset_split = dataset.train_test_split(test_size=config['file_load']['test_size'],shuffle=config['file_load']['shuffle'])
print(dataset_split)
train_data = dataset_split['train']
eval_data = dataset_split['test']

DatasetDict({
    train: Dataset({
        features: ['id', 'user', 'response'],
        num_rows: 960
    })
    test: Dataset({
        features: ['id', 'user', 'response'],
        num_rows: 240
    })
})


In [5]:
tokenizer = AutoTokenizer.from_pretrained(config['file_load']['model_path'],use_fast=False)
tokenizer

Qwen2Tokenizer(name_or_path='/home/liangshuqiao/models/qwen3', vocab_size=151643, model_max_length=131072, is_fast=False, padding_side='right', truncation_side='right', special_tokens={'eos_token': '<|im_end|>', 'pad_token': '<|endoftext|>', 'additional_special_tokens': ['<|im_start|>', '<|im_end|>', '<|object_ref_start|>', '<|object_ref_end|>', '<|box_start|>', '<|box_end|>', '<|quad_start|>', '<|quad_end|>', '<|vision_start|>', '<|vision_end|>', '<|vision_pad|>', '<|image_pad|>', '<|video_pad|>']}, clean_up_tokenization_spaces=False, added_tokens_decoder={
	151643: AddedToken("<|endoftext|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	151644: AddedToken("<|im_start|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	151645: AddedToken("<|im_end|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	151646: AddedToken("<|object_ref_start|>", rstrip=False, lstrip=False, single_word=Fals

In [6]:
EOS_TOKEN = tokenizer.eos_token
BEGIN_TOKEN = tokenizer.bos_token

def process_func(example):
    MAX_LENGTH = 384    # Llama分词器会将一个中文字切分为多个token，因此需要放开一些最大长度，保证数据的完整性
    
    instruction = "你是一个医美客服，回答以下用户问题，仅输出答案"
    attention_mask, labels = [], []
    instruction = tokenizer(f"<|im_start|>system\n你是一个医美助手，请根据问题做出回答<|im_end|>\n<|im_start|>user\n{example['user']}<|im_end|>\n<|im_start|>assistant\n", add_special_tokens=False)  # add_special_tokens 不在开头加 special_tokens
    response = tokenizer(f"{example['response']}", add_special_tokens=False)
    input_ids = instruction["input_ids"] + response["input_ids"] + [tokenizer.pad_token_id]
    attention_mask = instruction["attention_mask"] + response["attention_mask"] + [1]  # 因为eos token咱们也是要关注的所以 补充为1
    labels = [-100] * len(instruction["input_ids"]) + response["input_ids"] + [tokenizer.pad_token_id]  
    if len(input_ids) > MAX_LENGTH:  # 做一个截断
        input_ids = input_ids[:MAX_LENGTH]
        attention_mask = attention_mask[:MAX_LENGTH]
        labels = labels[:MAX_LENGTH]
    return {
        "input_ids": input_ids,
        "attention_mask": attention_mask,
        "labels": labels
    }

In [7]:
print(train_data)

Dataset({
    features: ['id', 'user', 'response'],
    num_rows: 960
})


In [8]:
remove_columns = ['user', 'response']

train_data = train_data.map(process_func,remove_columns=remove_columns)
eval_data = eval_data.map(process_func,remove_columns=remove_columns)
""" print(f"before remove columns:{train_data}")

train_data.remove_columns(remove_columns)
eval_data.remove_columns(remove_columns)
print(f"after remove columns:{train_data}") """

Map: 100%|██████████| 960/960 [00:01<00:00, 490.44 examples/s]
Map: 100%|██████████| 240/240 [00:00<00:00, 367.47 examples/s]


' print(f"before remove columns:{train_data}")\n\ntrain_data.remove_columns(remove_columns)\neval_data.remove_columns(remove_columns)\nprint(f"after remove columns:{train_data}") '

In [9]:
print(f"train dataset:{train_data}\neval dataset:{eval_data}")
print(tokenizer.decode(train_data[0]['input_ids']))
print(
tokenizer.decode(list(filter(lambda x: x != -100, train_data[1]["labels"]))))

train dataset:Dataset({
    features: ['id', 'input_ids', 'attention_mask', 'labels'],
    num_rows: 960
})
eval dataset:Dataset({
    features: ['id', 'input_ids', 'attention_mask', 'labels'],
    num_rows: 240
})
<|im_start|>system
你是一个医美助手，请根据问题做出回答<|im_end|>
<|im_start|>user
我刚刚做了激光脱毛，但感觉效果不明显，是不是被骗了？<|im_end|>
<|im_start|>assistant
激光脱毛的效果因人而异，通常需要多次疗程才能看到显著改善。我们会跟进您的具体情况，调整方案以达到最佳效果。<|endoftext|>
我们周一早上9点开门，建议您提前到达以便有充足的时间进行护理。<|endoftext|>


In [10]:
%pip install --upgrade transformers


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m25.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip3 install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [11]:
%time
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
model = AutoModelForCausalLM.from_pretrained(config['file_load']['model_path'],device_map="auto",torch_dtype=torch.float16)
model

CPU times: user 7 μs, sys: 0 ns, total: 7 μs
Wall time: 14.1 μs


Loading checkpoint shards: 100%|██████████| 17/17 [01:08<00:00,  4.03s/it]
Some parameters are on the meta device because they were offloaded to the cpu.


Qwen3ForCausalLM(
  (model): Qwen3Model(
    (embed_tokens): Embedding(151936, 5120)
    (layers): ModuleList(
      (0-63): 64 x Qwen3DecoderLayer(
        (self_attn): Qwen3Attention(
          (q_proj): Linear(in_features=5120, out_features=8192, bias=False)
          (k_proj): Linear(in_features=5120, out_features=1024, bias=False)
          (v_proj): Linear(in_features=5120, out_features=1024, bias=False)
          (o_proj): Linear(in_features=8192, out_features=5120, bias=False)
          (q_norm): Qwen3RMSNorm((128,), eps=1e-06)
          (k_norm): Qwen3RMSNorm((128,), eps=1e-06)
        )
        (mlp): Qwen3MLP(
          (gate_proj): Linear(in_features=5120, out_features=25600, bias=False)
          (up_proj): Linear(in_features=5120, out_features=25600, bias=False)
          (down_proj): Linear(in_features=25600, out_features=5120, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): Qwen3RMSNorm((5120,), eps=1e-06)
        (post_attention_layernorm): 

In [12]:
model.enable_input_require_grads()
model.dtype

torch.float16

In [13]:

from peft import LoraConfig, TaskType, get_peft_model

config = LoraConfig(
    task_type=TaskType.CAUSAL_LM, 
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    inference_mode=False, # 训练模式
    r=8, # Lora 秩
    lora_alpha=32, # Lora alaph，具体作用参见 Lora 原理
    lora_dropout=0.1# Dropout 比例
)
rprint(config)

In [14]:
model = get_peft_model(model, config)
rprint(config)
model.print_trainable_parameters()

trainable params: 67,108,864 || all params: 32,829,232,128 || trainable%: 0.2044


In [15]:

args = TrainingArguments(
    output_dir="./output/Qwen3-lora",
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    logging_steps=10,
    num_train_epochs=3,
    save_steps=100, 
    learning_rate=1e-4,
    save_on_each_node=True,
    gradient_checkpointing=True
)

In [16]:
print(next(model.parameters()).device)  # 检查是否在cuda

cuda:0


In [17]:
%time
trainer = Trainer(
    model=model,
    args=args,
    train_dataset=train_data,
    data_collator=DataCollatorForSeq2Seq(tokenizer=tokenizer, padding=True),
)

CPU times: user 16 μs, sys: 9 μs, total: 25 μs
Wall time: 44.8 μs


NotImplementedError: Cannot copy out of meta tensor; no data! Please use torch.nn.Module.to_empty() instead of torch.nn.Module.to() when moving module from meta to a different device.

In [None]:
trainer.train()

NameError: name 'trainer' is not defined

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
from peft import PeftModel
import time
import tqdm

mode_path = "/home/liangshuqiao/agent/Qwen2.5-14B-Instruct"
lora_path = 'output/Qwen2.5_instruct_lora/checkpoint-180' # 这里改称你的 lora 输出对应 checkpoint 地址

# 加载tokenizer
tokenizer = AutoTokenizer.from_pretrained(mode_path)

# 加载模型
model = AutoModelForCausalLM.from_pretrained(mode_path, device_map="auto",torch_dtype=torch.bfloat16, trust_remote_code=True).eval()

# 加载lora权重
model = PeftModel.from_pretrained(model, model_id=lora_path)

#prompt = "你是谁？"
if False:##预设问题问答
    for prompt in config['test_list']:
        inputs = tokenizer.apply_chat_template([{"role": "user", "content": "你是一个医美助手，请根据问题做出回答"},{"role": "user", "content": prompt}],
                                            add_generation_prompt=True,
                                            tokenize=True,
                                            return_tensors="pt",
                                            return_dict=True
                                            ).to('cuda')


        gen_kwargs = {"max_length": 2500, "do_sample": True, "top_k": 1}
        with torch.no_grad():
            outputs = model.generate(**inputs, **gen_kwargs)
            outputs = outputs[:, inputs['input_ids'].shape[1]:]
            print(prompt)
            print(tokenizer.decode(outputs[0], skip_special_tokens=True))
            print("\n")

if False:##主观测试问答
    while True:
        user_input = input("user：")
        if user_input != 'q':
            print(user_input)
            inputs = tokenizer.apply_chat_template([{"role": "user", "content": "你是一个医美助手，请根据问题做出回答"},{"role": "user", "content": user_input}],
                                                add_generation_prompt=True,
                                                tokenize=True,
                                                return_tensors="pt",
                                                return_dict=True
                                                ).to('cuda')


            gen_kwargs = {"max_length": 2500, "do_sample": True, "top_k": 1}
            with torch.no_grad():
                outputs = model.generate(**inputs, **gen_kwargs)
                outputs = outputs[:, inputs['input_ids'].shape[1]:]
                print(tokenizer.decode(outputs[0], skip_special_tokens=True))
                print("\n")
        else:
            break

if True:##进行余弦相似度评估
    from nltk.translate.bleu_score import corpus_bleu
    from nltk.translate.bleu_score import SmoothingFunction
    import random
    smoothie = SmoothingFunction().method4

    predict_list = []
    real_list = []
    time_list = []
    counter = 0
    for i in range(500):
        counter += 1
        print(f"range:{counter}")
        test_sample = random.randint(0, len(dataset['user']) - 1)
        prompt = dataset['user'][test_sample]
        real_response = dataset['response'][test_sample]
        print(f"question: {prompt}")
        print(f"real response: {real_response}")
        
        begin_time = time.time()
        inputs = tokenizer.apply_chat_template([
            {"role": "user", "content": "你是一个医美助手，请根据问题做出回答"},
            {"role": "user", "content": prompt}
        ],
        add_generation_prompt=True,
        tokenize=True,
        return_tensors="pt",
        return_dict=True).to('cuda')

        gen_kwargs = {"max_new_tokens": 512, "do_sample": True, "top_k": 1}
        with torch.no_grad():
            outputs = model.generate(**inputs, **gen_kwargs)
            outputs = outputs[:, inputs['input_ids'].shape[1]:]
            model_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
            final_time = time.time()
            print(f"model output: {model_output}\nuse:{(final_time - begin_time):2f}s")
            print("\n")

        # 收集原始预测与参考答案
        use_time = final_time - begin_time
        predict_list.append(model_output)
        real_list.append(real_response)
        time_list.append(use_time)

    # BLEU：基于 tokenizer 的分词
    tokenized_predictions = [tokenizer(pred, add_special_tokens=False)["input_ids"] for pred in predict_list]
    tokenized_references = [[tokenizer(ref, add_special_tokens=False)["input_ids"]] for ref in real_list]

    # 使用 corpus_bleu 计算平均 BLEU
    average_bleu = corpus_bleu(tokenized_references, tokenized_predictions, smoothing_function=smoothie)
    print(f"Average BLEU score (token-level): {average_bleu:.4f}")
    print(f"average use time for {(sum(time_list)/len(time_list)):4f}")


Sliding Window Attention is enabled but not implemented for `sdpa`; unexpected results may be encountered.
Loading checkpoint shards: 100%|██████████| 8/8 [00:10<00:00,  1.36s/it]


range:1
question: 我想做唇部整形和颈部抗衰项目，这俩项目做完后能泡温泉吗，多久之后可以？
real response: 亲，做完这两个项目后半个月内不建议泡温泉哦。因为泡温泉可能会使皮肤血液循环加快，影响项目的恢复效果，半个月后等皮肤完全稳定了再去泡温泉就没问题啦。
model output: 亲，做完这两个项目后半个月内不建议泡温泉哦。因为泡温泉可能会使皮肤血液循环加快，影响项目的恢复效果，半个月后等皮肤完全稳定了再去泡温泉就没问题啦。
use:6.063363s


range:2
question: 为什么我必须提前这么长时间预约，难道不能随到随做吗？
real response: 为了确保每位顾客都能获得充分的关注和服务质量，我们需要合理安排时间表。虽然高峰时段需提前预约，但在非繁忙时段我们也尽量提供灵活性。
model output: 为了确保每位顾客都能获得充分的关注和服务质量，我们需要合理安排时间表。虽然高峰时段需要提前预约，但在非繁忙时段我们也尽量提供一定的灵活性。建议您提前查看我们的实时可用时间段，这样可以更容易地找到适合您的时间。
use:6.710456s


range:3
question: 我打算做胸部隆胸和瘦腰腹项目，什么时候能安排上，医生专业度咋样？
real response: 亲，您要是想做胸部隆胸和瘦腰腹项目，我们近期就可以为您安排哦。负责这两个项目的医生都是非常专业的呢，隆胸医生有丰富的胸部整形经验，瘦腰腹项目的医生对各种吸脂和塑形技术都很精通，会为您打造理想的身材。
model output: 亲，您要是想做胸部隆胸和瘦腰腹项目，我们近期就可以为您安排哦。负责这两个项目的医生都是非常专业的呢，隆胸医生有丰富的胸部整形经验，瘦腰腹项目的医生对各种吸脂和塑形技术都很精通，会为您打造理想的身材。
use:7.927669s


range:4
question: 你们平时做项目时使用的产品质量有保障吗？
real response: 我们非常重视产品质量，所有用于项目的产品都经过严格筛选和检测，均来自正规渠道，质量有充分保障，您可以放心。
model output: 我们非常重视产品质量，所有用于项目的产品都经过严格筛选和检测，均来自正规渠道，质量有充分保障，您可以放心。
use:3.916117s


rang