In [None]:
torchrun --nproc_per_node 4 --nnodes 1 --node_rank 0 --master_addr localhost --master_port 6601 /root/autodl-tmp/finetune.py \
    --model_name_or_path "/root/autodl-fs/DeepSeek-R1-Distill-Qwen-32B" \
    --data_path "/root/autodl-tmp/data/datasets/output.json" \
    --eval_data_path "/root/autodl-tmp/data/datasets/dev.json" \
    --bf16 True \
    --output_dir "/root/autodl-fs/trained_models/deepseek_ri_32b_sop" \
    --num_train_epochs 66 \
    --per_device_train_batch_size 12 \
    --per_device_eval_batch_size 12 \
    --gradient_accumulation_steps 1 \
    --eval_strategy "steps" \
    --eval_steps  100 \
    --metric_for_best_model "eval_loss" \
    --greater_is_better False \
    --save_strategy "steps" \
    --save_steps 666 \
    --load_best_model_at_end True \
    --save_total_limit 3 \
    --learning_rate 1e-5 \
    --weight_decay 0.1 \
    --adam_beta2 0.95 \
    --warmup_ratio 0.03 \
    --lr_scheduler_type "cosine" \
    --logging_steps 1 \
    --report_to "none" \
    --model_max_length 512 \
    --gradient_checkpointing True \
    --lazy_preprocess True \
    --deepspeed "/root/autodl-tmp/deepspeed/ds_config_zero2.json" \
    --use_lora

## 权重融合

In [None]:
from transformers import AutoModelForCausalLM
from peft import PeftModel
import torch

model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen-1_8B-Chat/", torch_dtype=torch.float16, device_map="auto", trust_remote_code=True)
model = PeftModel.from_pretrained(model, "output_qwen/")
merged_model = model.merge_and_unload()
merged_model.save_pretrained("output_qwen_merged", max_shard_size="2048MB", safe_serialization=True)

## 分词器

In [None]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained(
    "Qwen/Qwen-1_8B-Chat/",
    trust_remote_code=True
)

tokenizer.save_pretrained("output_qwen_merged")

## 模型测试

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers.generation import GenerationConfig
import torchvision

torchvision.disable_beta_transforms_warning()

tokenizer = AutoTokenizer.from_pretrained("/root/autodl-fs/DeepSeek-R1-Distill-Llama-70B", trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    "/root/autodl-fs/DeepSeek-R1-Distill-Llama-70B",
    device_map="auto",
    trust_remote_code=True
).eval()

prompt = "你好,你是谁？"
inputs = tokenizer(prompt, return_tensors="pt")

response = tokenizer.decode(model.generate(**inputs, max_new_tokens=128000)[0], skip_special_tokens=True)
print(response)

Loading checkpoint shards:   0%|          | 0/17 [00:00<?, ?it/s]

### 选一条样本对训练出来的模型进行人工比对

In [None]:
{
    "conversations": 
        [
            {
                "from": "user", 
                "value": "你现在是一个很厉害的阅读理解器，严格按照人类指令进行回答。\nInput: 下面描述中包含了哪些SPO？\n返回json回答:\n\n花样男子花样男子《花样男子》，日文原名《花より男子》（Hana-yori Danshi），是日本漫画家神尾叶子的一部长篇爱情校园漫画，原作自1992年至2004年在集英社的漫画杂志《Margaret》上连载，并发行了37册的单行本，销量超过5900万册，霸占了日本第一畅销少女漫画的宝座。"}, 
            {
                "from": "assistant", 
                "value": "[{\"predicate\": \"编剧\", \"object_type\": \"人物\", \"subject_type\": \"影视作品\", \"object\": \"神尾叶子\", \"subject\": \"花样男子\"}, {\"predicate\": \"出版社\", \"object_type\": \"出版社\", \"subject_type\": \"书籍\", \"object\": \"集英社\", \"subject\": \"花样男子\"}]"
            }
        ], 
    "id": "identity_0"
}