In [39]:
from datasets import Dataset
import pandas as pd
from transformers import AutoTokenizer, AutoModelForCausalLM, DataCollatorForSeq2Seq, TrainingArguments, Trainer, GenerationConfig

In [40]:
with open("dataset/副本智能电器控制分类数据20250328.csv",'r') as f:
    dataset = pd.read_csv(f)
dataset = Dataset.from_pandas(dataset)
dataset

Dataset({
    features: ['电器', '控制', '参数', '用户指令', '调优思维链', '调优模型输出'],
    num_rows: 467
})

In [41]:
from loguru import logger
from ruamel.yaml import YAML
from rich import print as rprint

yaml = YAML()
with open('config.yaml', 'r') as f:
    config = yaml.load(f)
config

{'file_load': {'model_path': '/home/liangshuqiao/models/DeepSeek-R1-Distill-Qwen-7B', 'dataset_path': 'dataset/chat_expanded_new_resampled.csv', 'eval_dataset_path': 'dataset/meituan_data_clean_final.csv', 'test_size': 0.2, 'split_way': 'train', 'save_model_path': '/home/liangshuqiao/agent_source/model_outputs/Qwen3', 'gguf_path': 'model_output/qwen2_14b_all_lora_gguf', 'logging_path': 'trainlog', 'shuffle': True}, 'training_arg': {'dtype': 'torch.float16', 'load_in_4bit': True, 'batch_size': 16, 'gradient_accumulator_steps': 1, 'warmup_steps': 0, 'epoch': 100, 'eval_steps': 10, 'learning_rate': 1e-05, 'lr_scheduler_type': 'cosine', 'max_seq_length': 512, 'use_history': False, 'r': 8, 'target_modules': ['q_proj', 'k_proj', 'v_proj', 'o_proj', 'gate_proj', 'up_proj', 'down_proj'], 'lora_alpha': 16, 'lora_dropout': 0.1, 'bias': 'none', 'use_gradient_checkpointing': 'unsloth', 'random_state': 3407, 'use_rslora': True, 'loftq_config': 'None'}, 'alpaca_prompt': '"""\n\n###instruction\n{}\n#

In [42]:
tokenizer = AutoTokenizer.from_pretrained(config['file_load']['model_path'],use_fast=False,trust_remote_code=False)
tokenizer.padding_side = 'right'
tokenizer

LlamaTokenizerFast(name_or_path='/home/liangshuqiao/models/DeepSeek-R1-Distill-Qwen-7B', vocab_size=151643, model_max_length=16384, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'bos_token': '<｜begin▁of▁sentence｜>', 'eos_token': '<｜end▁of▁sentence｜>', 'pad_token': '<｜end▁of▁sentence｜>'}, clean_up_tokenization_spaces=False, added_tokens_decoder={
	151643: AddedToken("<｜end▁of▁sentence｜>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	151644: AddedToken("<｜User｜>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
	151645: AddedToken("<｜Assistant｜>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
	151646: AddedToken("<｜begin▁of▁sentence｜>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	151647: AddedToken("<|EOT|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
	151648: AddedToken("<think>",

In [None]:
EOS = tokenizer.eos_token
EOS

'<｜end▁of▁sentence｜>'

In [44]:
EOS_TOKEN = tokenizer.eos_token_id
BOS_TOKEN = tokenizer.bos_token_id

prompt = """
<|im_start|>你是一个智能家居助手，你根据用户的输入和当前控制的电器信息使用control()函数控制电器。<|im_end|>
<|im_start|>
用户指令：{}<|im_end|>

<|im_start|>助手思考（请根据用户输入分析需要执行的操作）：
<think>
{}
分析结果：
设备：{}
目标状态：{}
参数：{}
</think><|im_end|>

<|im_start|>助手操作（请直接输出control()函数调用，不要包含其他内容）：
{}<|im_end|>
"""

In [45]:
tokenizer.pad_token_id

151643

In [46]:
def process_func(example):
    device = example['电器']
    control = example['控制']
    argument = example['参数']
    instruction = example['用户指令']
    think_chain = example['调优思维链']
    output = example['调优模型输出']

    MAX_LENGTH = 512
    input_ids,attention_mask,labels = [],[],[]
    sample = prompt.format(instruction,think_chain,device,control,argument,output)
    sample_token = tokenizer(sample,add_special_tokens=False)
    input_ids = sample_token['input_ids'] + [tokenizer.pad_token_id]
    attention_mask = sample_token['attention_mask'] + [1]

    # 修复 labels 的生成逻辑，确保与 input_ids 长度一致
    labels = sample_token['input_ids'] + [tokenizer.pad_token_id]

    if len(input_ids) > MAX_LENGTH:
        input_ids = input_ids[:MAX_LENGTH]
        attention_mask = attention_mask[:MAX_LENGTH]
        labels = labels[:MAX_LENGTH]

    return {
        "input_ids": input_ids,
        "attention_mask": attention_mask,
        "labels": labels
    }

In [47]:
dataset

Dataset({
    features: ['电器', '控制', '参数', '用户指令', '调优思维链', '调优模型输出'],
    num_rows: 467
})

In [48]:
dataset = dataset.map(process_func,remove_columns=['电器', '控制', '参数', '用户指令', '调优思维链', '调优模型输出'])
tokenizer.decode(dataset[:1]['input_ids'][0])

Map: 100%|██████████| 467/467 [00:00<00:00, 1184.60 examples/s]


'\n<|im_start|>你是一个智能家居助手，你根据用户的输入和当前控制的电器信息使用control()函数控制电器。<|im_end|>\n<|im_start|>\n用户指令：打开灯<|im_end|>\n\n<|im_start|>助手思考（请根据用户输入分析需要执行的操作）：\n<think>\n用户明确指令开启特定区域灯光。依据任务，直接执行对应设备操作。设备是客厅灯，动作为"开启"，参数无\n分析结果：\n设备：灯\n目标状态：开启\n参数：无\n</think><|im_end|>\n\n<|im_start|>助手操作（请直接输出control()函数调用，不要包含其他内容）：\ncontrol(客厅灯,开启,无)<|im_end|>\n<｜end▁of▁sentence｜>'

In [49]:
import torch

print(config['file_load']['model_path'])
model = AutoModelForCausalLM.from_pretrained(
                                    config['file_load']['model_path'],
                                    trust_remote_code=False,
                                    torch_dtype=torch.bfloat16,
                                    device_map=None,
                                    low_cpu_mem_usage=True)
model.generation_config = GenerationConfig.from_pretrained(config['file_load']['model_path'])
model.generation_config.pad_token_id = model.generation_config.eos_token_id
model

/home/liangshuqiao/models/DeepSeek-R1-Distill-Qwen-7B


Loading checkpoint shards: 100%|██████████| 2/2 [00:00<00:00, 24.76it/s]


Qwen2ForCausalLM(
  (model): Qwen2Model(
    (embed_tokens): Embedding(152064, 3584)
    (layers): ModuleList(
      (0-27): 28 x Qwen2DecoderLayer(
        (self_attn): Qwen2Attention(
          (q_proj): Linear(in_features=3584, out_features=3584, bias=True)
          (k_proj): Linear(in_features=3584, out_features=512, bias=True)
          (v_proj): Linear(in_features=3584, out_features=512, bias=True)
          (o_proj): Linear(in_features=3584, out_features=3584, bias=False)
        )
        (mlp): Qwen2MLP(
          (gate_proj): Linear(in_features=3584, out_features=18944, bias=False)
          (up_proj): Linear(in_features=3584, out_features=18944, bias=False)
          (down_proj): Linear(in_features=18944, out_features=3584, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): Qwen2RMSNorm((3584,), eps=1e-06)
        (post_attention_layernorm): Qwen2RMSNorm((3584,), eps=1e-06)
      )
    )
    (norm): Qwen2RMSNorm((3584,), eps=1e-06)
    (rotary_emb):

In [50]:
model.enable_input_require_grads()
model.dtype

torch.bfloat16

In [51]:
from peft import LoraConfig, TaskType, get_peft_model

config = LoraConfig(
    task_type=TaskType.CAUSAL_LM, 
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    inference_mode=False, # 训练模式
    r=8, # Lora 秩
    lora_alpha=32, # Lora alaph，具体作用参见 Lora 原理
    lora_dropout=0.1# Dropout 比例
)
config

LoraConfig(task_type=<TaskType.CAUSAL_LM: 'CAUSAL_LM'>, peft_type=<PeftType.LORA: 'LORA'>, auto_mapping=None, base_model_name_or_path=None, revision=None, inference_mode=False, r=8, target_modules={'k_proj', 'o_proj', 'gate_proj', 'down_proj', 'q_proj', 'v_proj', 'up_proj'}, exclude_modules=None, lora_alpha=32, lora_dropout=0.1, fan_in_fan_out=False, bias='none', use_rslora=False, modules_to_save=None, init_lora_weights=True, layers_to_transform=None, layers_pattern=None, rank_pattern={}, alpha_pattern={}, megatron_config=None, megatron_core='megatron.core', loftq_config={}, eva_config=None, use_dora=False, layer_replication=None, runtime_config=LoraRuntimeConfig(ephemeral_gpu_offload=False), lora_bias=False)

In [52]:
model = get_peft_model(model, config)
config

LoraConfig(task_type=<TaskType.CAUSAL_LM: 'CAUSAL_LM'>, peft_type=<PeftType.LORA: 'LORA'>, auto_mapping=None, base_model_name_or_path='/home/liangshuqiao/models/DeepSeek-R1-Distill-Qwen-7B', revision=None, inference_mode=False, r=8, target_modules={'k_proj', 'o_proj', 'gate_proj', 'down_proj', 'q_proj', 'v_proj', 'up_proj'}, exclude_modules=None, lora_alpha=32, lora_dropout=0.1, fan_in_fan_out=False, bias='none', use_rslora=False, modules_to_save=None, init_lora_weights=True, layers_to_transform=None, layers_pattern=None, rank_pattern={}, alpha_pattern={}, megatron_config=None, megatron_core='megatron.core', loftq_config={}, eva_config=None, use_dora=False, layer_replication=None, runtime_config=LoraRuntimeConfig(ephemeral_gpu_offload=False), lora_bias=False)

In [53]:
model.print_trainable_parameters()

trainable params: 20,185,088 || all params: 7,635,801,600 || trainable%: 0.2643


In [54]:
args = TrainingArguments(
    output_dir="./output/DeepSeek",
    per_device_train_batch_size=4,
    gradient_accumulation_steps=2,
    logging_steps=5,
    num_train_epochs=3,
    save_steps=100,
    learning_rate=1e-4,
    save_on_each_node=True,
    gradient_checkpointing=True,
    # 添加这些参数以避免meta tensor问题
    remove_unused_columns=True,
    dataloader_pin_memory=False,  # 添加这个参数
)

In [55]:
trainer = Trainer(
    model=model,
    args=args,
    train_dataset=dataset,
    data_collator=DataCollatorForSeq2Seq(tokenizer=tokenizer, padding=True),
)

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [56]:
trainer.train()

`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


Step,Training Loss
5,3.2473
10,2.1031
15,1.3089
20,0.888
25,0.6804
30,0.5934
35,0.4568
40,0.4435
45,0.4102
50,0.4725


TrainOutput(global_step=174, training_loss=0.5162602484911337, metrics={'train_runtime': 1648.5557, 'train_samples_per_second': 0.85, 'train_steps_per_second': 0.106, 'total_flos': 1.3313254381120512e+16, 'train_loss': 0.5162602484911337, 'epoch': 2.9572649572649574})

In [57]:

torch.cuda.empty_cache()
torch.clear_autocast_cache()

In [58]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
from peft import PeftModel
import time
import tqdm

mode_path = "/home/liangshuqiao/models/DeepSeek-R1-Distill-Qwen-7B"
lora_path = "output/DeepSeek/checkpoint-87" # lora 输出对应 checkpoint 地址

# 加载tokenizer
tokenizer = AutoTokenizer.from_pretrained(mode_path)

# 加载模型
model = AutoModelForCausalLM.from_pretrained(mode_path, device_map="auto",torch_dtype=torch.bfloat16, trust_remote_code=True).eval()

# 加载lora权重
model = PeftModel.from_pretrained(model, model_id=lora_path)

Loading checkpoint shards: 100%|██████████| 2/2 [00:03<00:00,  1.56s/it]
Some parameters are on the meta device because they were offloaded to the cpu.
Some parameters are on the meta device because they were offloaded to the cpu.


In [59]:

# 定义输入模板
input_prompt = """
你是一个智能家居助手，你根据用户的输入和当前控制的电器信息分析设备，目标状态和参数并直接输出control()函数控制电器
如果用户没有具体指定电气可以根据语义推断
只需要输出control（）函数即可
"""
instruct = "用户指令:"
instruct += "设置灯光色温为5000w"

# 定义用户输入内容
messages = [
    {"role": "system", "content": input_prompt},  # 系统提示
    {"role": "user", "content": instruct}  # 用户输入
]

print(messages)

# 使用 apply_chat_template 构造输入
inputs = tokenizer.apply_chat_template(
    conversation=messages,
    add_generation_prompt=True,  # 添加生成提示
    tokenize=True,
    return_tensors="pt",
    return_dict=True
).to("cuda")

# 打印生成的输入
#print("构造的输入：", inputs)

# 定义生成参数
gen_kwargs = {
    "max_length": 256,
    "do_sample": True,
    "top_k": 50,
    "top_p": 0.9,
    "temperature": 0.2
}

# 生成模型输出
with torch.no_grad():
    outputs = model.generate(**inputs, **gen_kwargs)
    outputs = outputs[:, inputs['input_ids'].shape[1]:]  # 去掉输入部分的 token

    # 打印用户输入
    print("\n用户输入:")
    print(messages[-1]["content"])
    print("\n")

    # 打印模型输出
    print("模型输出:<think>")
    for token_id in outputs[0]:
        print(tokenizer.decode([token_id], skip_special_tokens=True), end='', flush=True)
    print("\n")

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


[{'role': 'system', 'content': '\n你是一个智能家居助手，你根据用户的输入和当前控制的电器信息分析设备，目标状态和参数并直接输出control()函数控制电器\n如果用户没有具体指定电气可以根据语义推断\n只需要输出control（）函数即可\n'}, {'role': 'user', 'content': '用户指令:设置灯光色温为5000w'}]

用户输入:
用户指令:设置灯光色温为5000w


模型输出:<think>
用户明确要求设置灯光色温为5000w，直接推断对lights设备执行设置目标状态。参数是“color_temp=5000”，设备是lights，参数无其他内容。
分析结果：
设备：lights
目标状态：color_temp=5000
参数：无
</think>control(lights, color_temp=5000)

