In [None]:
import os
os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"

### 使用原始huatuo数据集进行微调

加载FreedomIntelligence/Huatuo26M-Lite数据集，并转换为sharegpt格式

In [None]:
!python convert_huatuo2sharegpt.py

SFT

In [None]:
!python supervised_finetuning.py \
    --model_name_or_path ./models/LLM-Research/Llama-3___2-3B-Instruct \
    --train_file_dir ./dataset/SFT_huotuo/train \
    --validation_file_dir ./dataset/SFT_huotuo/val \
    --per_device_train_batch_size 4 \
    --per_device_eval_batch_size 4 \
    --gradient_accumulation_steps 8 \
    --max_eval_samples 500 \
    --do_train \
    --do_eval \
    --use_peft True \
    --bf16 \
    --num_train_epochs 1 \
    --learning_rate 5e-5 \
    --warmup_ratio 0.03 \
    --weight_decay 0.01 \
    --logging_strategy steps \
    --logging_steps 10 \
    --eval_steps 100 \
    --eval_strategy steps \
    --save_steps 500 \
    --save_strategy steps \
    --save_total_limit 2 \
    --preprocessing_num_workers 8 \
    --output_dir outputs-llama3-3b-sft-v1 \
    --ddp_timeout 30000 \
    --logging_first_step True \
    --target_modules all \
    --lora_rank 16 \
    --lora_alpha 32 \
    --lora_dropout 0.05 \
    --torch_dtype bfloat16 \
    --device_map auto \
    --report_to tensorboard \
    --ddp_find_unused_parameters False \
    --gradient_checkpointing False

查看日志

In [None]:
!tensorboard --logdir outputs-llama3-3b-sft-v1/runs --host 0.0.0.0 --port 8009

合并模型

In [None]:
!python merge_peft_adapter.py \
    --base_model ./models/LLM-Research/Llama-3___2-3B-Instruct \
    --lora_model outputs-llama3-3b-sft-v1 \
    --output_dir ./merged-llama3-3b-sft-v1

### 使用经过向量相似度过滤后的huatuo数据集进行微调

In [None]:
!python prepare_huatuo_filter.py

In [None]:
!python supervised_finetuning.py \
    --model_name_or_path ./models/LLM-Research/Llama-3___2-3B-Instruct \
    --train_file_dir ./dataset/SFT_huatuo_filter_test_q/train \
    --validation_file_dir ./dataset/SFT_huatuo_filter_test_q/val \
    --per_device_train_batch_size 4 \
    --per_device_eval_batch_size 4 \
    --gradient_accumulation_steps 8 \
    --max_eval_samples 500 \
    --do_train \
    --do_eval \
    --use_peft True \
    --bf16 \
    --num_train_epochs 2 \
    --learning_rate 5e-5 \
    --warmup_ratio 0.03 \
    --weight_decay 0.01 \
    --logging_strategy steps \
    --logging_steps 10 \
    --eval_steps 50 \
    --eval_strategy steps \
    --save_steps 500 \
    --save_strategy steps \
    --save_total_limit 2 \
    --preprocessing_num_workers 8 \
    --output_dir outputs-llama3-3b-sft-v1 \
    --ddp_timeout 30000 \
    --logging_first_step True \
    --target_modules all \
    --lora_rank 16 \
    --lora_alpha 32 \
    --lora_dropout 0.05 \
    --torch_dtype bfloat16 \
    --device_map auto \
    --report_to tensorboard \
    --ddp_find_unused_parameters False \
    --gradient_checkpointing False

查看日志

In [None]:
# !tensorboard --logdir outputs-llama3-3b-sft-v1/runs --host 0.0.0.0 --port 8009
!tensorboard --logdir=outputs-llama3-3b-sft-v1/runs --port 6006

合并模型

In [None]:
!python merge_peft_adapter.py \
    --base_model ./models/LLM-Research/Llama-3___2-3B-Instruct \
    --lora_model outputs-llama3-3b-sft-v1 \
    --output_dir ./merged-llama3-3b-sft-v3

## 使用CMB数据集进行召回后SFT

In [None]:
!python prepare_CMB_filter.py

In [None]:
!python supervised_finetuning.py \
    --model_name_or_path ./models/LLM-Research/Llama-3___2-3B-Instruct \
    --train_file_dir ./dataset/SFT_CMB_filter/train \
    --validation_file_dir ./dataset/SFT_CMB_filter/val \
    --per_device_train_batch_size 4 \
    --per_device_eval_batch_size 4 \
    --gradient_accumulation_steps 8 \
    --max_eval_samples 500 \
    --do_train \
    --do_eval \
    --use_peft True \
    --bf16 \
    --num_train_epochs 2 \
    --learning_rate 5e-5 \
    --warmup_ratio 0.03 \
    --weight_decay 0.01 \
    --logging_strategy steps \
    --logging_steps 10 \
    --eval_steps 50 \
    --eval_strategy steps \
    --save_steps 500 \
    --save_strategy steps \
    --save_total_limit 2 \
    --preprocessing_num_workers 8 \
    --output_dir outputs-llama3-3b-sft-v1 \
    --ddp_timeout 30000 \
    --logging_first_step True \
    --target_modules all \
    --lora_rank 16 \
    --lora_alpha 32 \
    --lora_dropout 0.05 \
    --torch_dtype bfloat16 \
    --device_map auto \
    --report_to tensorboard \
    --ddp_find_unused_parameters False \
    --gradient_checkpointing False

In [None]:
!python merge_peft_adapter.py \
    --base_model ./models/LLM-Research/Llama-3___2-3B-Instruct \
    --lora_model outputs-llama3-3b-sft-v1 \
    --output_dir ./merged-llama3-3b-sft-CMB-v1

## 使用CMB和huatuo数据集混合召回后进行SFT

In [None]:
!python supervised_finetuning.py \
    --model_name_or_path ./models/LLM-Research/Llama-3___2-3B-Instruct \
    --train_file_dir ./dataset/SFT_mix_filter/train \
    --validation_file_dir ./dataset/SFT_mix_filter/val \
    --per_device_train_batch_size 4 \
    --per_device_eval_batch_size 4 \
    --gradient_accumulation_steps 8 \
    --max_eval_samples 500 \
    --do_train \
    --do_eval \
    --use_peft True \
    --bf16 \
    --num_train_epochs 1 \
    --learning_rate 5e-5 \
    --warmup_ratio 0.03 \
    --weight_decay 0.01 \
    --logging_strategy steps \
    --logging_steps 10 \
    --eval_steps 50 \
    --eval_strategy steps \
    --save_steps 500 \
    --save_strategy steps \
    --save_total_limit 2 \
    --preprocessing_num_workers 8 \
    --output_dir outputs-llama3-3b-sft-v1 \
    --ddp_timeout 30000 \
    --logging_first_step True \
    --target_modules all \
    --lora_rank 8 \
    --lora_alpha 16 \
    --lora_dropout 0.05 \
    --torch_dtype bfloat16 \
    --device_map auto \
    --report_to tensorboard \
    --ddp_find_unused_parameters False \
    --gradient_checkpointing False

In [None]:
!python merge_peft_adapter.py \
    --base_model ./models/LLM-Research/Llama-3___2-3B-Instruct \
    --lora_model outputs-llama3-3b-sft-v1 \
    --output_dir ./merged-llama3-3b-sft-mix-v2

## DPO阶段

In [1]:
!python dpo_training.py \
    --model_name_or_path ./merged-llama3-3b-sft-mix-v1 \
    --template_name llama3 \
    --train_file_dir ./dataset/DPO/train \
    --validation_file_dir ./dataset/DPO/val \
    --max_eval_samples 200 \
    --per_device_train_batch_size 1 \
    --per_device_eval_batch_size 1 \
    --gradient_accumulation_steps 16 \
    --learning_rate 1e-5 \
    --num_train_epochs 3 \
    --max_source_length 1024 \
    --do_train \
    --do_eval \
    --max_target_length 1024 \
    --output_dir outputs-llama-3b-dpo-v1 \
    --target_modules all \
    --lora_rank 32 \
    --lora_alpha 64 \
    --lora_dropout 0.05 \
    --use_peft True \
    --bf16 True \
    --fp16 False \
    --device_map auto \
    --report_to tensorboard \
    --gradient_checkpointing True \
    --remove_unused_columns False \
    --beta 0.05 \
    --logging_steps 10 \
    --save_steps 100 \
    --optim adamw_torch 
    

[32m2026-02-13 20:19:16.919[0m | [1mINFO    [0m | [36m__main__[0m:[36mmain[0m:[36m198[0m - [1mParse args: ScriptArguments(model_name_or_path='./merged-llama3-3b-sft-mix-v1', tokenizer_name_or_path=None, load_in_8bit=False, load_in_4bit=False, cache_dir=None, use_fast_tokenizer=False, torch_dtype=None, device_map='auto', trust_remote_code=True, dataset_name=None, dataset_config_name=None, train_file_dir='./dataset/DPO/train', validation_file_dir='./dataset/DPO/val', template_name='llama3', per_device_train_batch_size=1, per_device_eval_batch_size=1, max_source_length=1024, max_target_length=1024, min_target_length=4, max_train_samples=None, max_eval_samples=200, overwrite_cache=False, validation_split_percentage=1, preprocessing_num_workers=4, use_peft=True, qlora=False, target_modules='all', lora_rank=32, lora_dropout=0.05, lora_alpha=64.0, peft_path=None, do_train=True, do_eval=True, learning_rate=1e-05, lr_scheduler_type='cosine', warmup_steps=100, weight_decay=0.05, optim

In [2]:
!python merge_peft_adapter.py \
    --base_model ./merged-llama3-3b-sft-mix-v1 \
    --lora_model outputs-llama-3b-dpo-v1 \
    --output_dir ./merged-llama3-3b-dpo-v2

Namespace(base_model='./merged-llama3-3b-sft-mix-v1', tokenizer_path=None, lora_model='outputs-llama-3b-dpo-v1', resize_emb=False, output_dir='./merged-llama3-3b-dpo-v2', hf_hub_model_id='', hf_hub_token=None)
Base model: ./merged-llama3-3b-sft-mix-v1
LoRA model: outputs-llama-3b-dpo-v1
Loading LoRA for causal language model
Loading weights: 100%|█| 254/254 [00:01<00:00, 150.00it/s, Materializing param=m
Merging with merge_and_unload...
Saving to Hugging Face format...
Writing model shards: 100%|███████████████████████| 1/1 [00:09<00:00,  9.81s/it]
Done! model saved to ./merged-llama3-3b-dpo-v2
