# LLaMA-Factory (CMD)
https://github.com/hiyouga/LLaMA-Factory

## 初始環境設定

In [None]:
import os
from pathlib import Path
HOME = str(Path.home())
Add_Binarry_Path=HOME+'/.local/bin'
os.environ['PATH']=os.environ['PATH']+':'+Add_Binarry_Path
current_foldr=!pwd
current_foldr=current_foldr[0]
current_foldr

## 套件

In [None]:
!pip install cohere gdown kaleido langchain openai pyngrok pypdf python-dotenv sentence-transformers tiktoken -q
!pip install accelerate bitsandbytes hf_transfer huggingface_hub optimum transformers==4.36.2 -q 
!pip install appdirs black black[jupyter] datasets fire loralib sentencepiece gradio==3.48.0 -q
!pip install fastapi jieba matplotlib nltk peft==0.7.0 protobuf pydantic rouge-chinese scipy sse-starlette trl==0.7.6 uvicorn -q 
!pip install deepspeed -q

In [None]:
# HF_TOKEN
import os
os.environ["HF_TOKEN"] = "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"

In [None]:
# Download dataset
from huggingface_hub import hf_hub_download
import pandas as pd
REPO_ID = "dikw/hh_rlhf_cn"
FILENAME = "harmless_base_cn_train.jsonl"
downloaded_model_path=hf_hub_download(repo_id=REPO_ID, filename=FILENAME, repo_type="dataset")
print(downloaded_model_path)'

In [None]:
# 所有資料內容
import pandas as pd 
file_path='./harmless_base_cn_train.jsonl'
df = pd.read_json(path_or_buf=file_path, lineas=True)
df

In [None]:
!wandb offline

## Continue Pretraining (CP)

In [None]:
%%bash
#METHOD 01 Python
rm -rf  path_to_pt_checkpoint
MODEL_ID="/work/g00cjz00/github/LLaMA-Factory/Llama-2-7b-hf"
CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
    --stage pt \
    --do_train \
    --model_name_or_path ${MODEL_ID} \
    --dataset wiki_demo \
    --finetuning_type lora \
    --lora_target q_proj,v_proj \
    --output_dir path_to_pt_checkpoint \
    --overwrite_cache \
    --per_device_train_batch_size 4 \
    --gradient_accumulation_steps 4 \
    --lr_scheduler_type cosine \
    --logging_steps 10 \
    --save_steps 1000 \
    --learning_rate 5e-5 \
    --num_train_epochs 1.0 \
    --plot_loss \
    --fp16 \
    --max_samples 500 \
    --plot_loss True 

## Supervised Fine-Tuning (SFT)

In [None]:
%%bash
#METHOD 01 Python
rm -rf path_to_sft_checkpoint
MODEL_ID="/work/u00cjz00/slurm_jobs/github/models/CKIP-Llama-2-7b-chat"
CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
    --stage sft \
    --do_train \
    --model_name_or_path ${MODEL_ID} \
    --dataset alpaca_gpt4_en \
    --template default \
    --finetuning_type lora \
    --lora_target q_proj,v_proj \
    --output_dir path_to_sft_checkpoint \
    --overwrite_cache \
    --per_device_train_batch_size 4 \
    --gradient_accumulation_steps 4 \
    --lr_scheduler_type cosine \
    --logging_steps 10 \
    --save_steps 1000 \
    --learning_rate 5e-5 \
    --num_train_epochs 3.0 \
    --fp16 \
    --max_samples 500 \
    --plot_loss True 

## Direct Preference Optimization (DPO)
LLM的直接偏好優化

In [None]:
%%bash
#METHOD 01
rm -rf path_to_dpo_checkpoint
MODEL_ID="/work/g00cjz00/github/LLaMA-Factory/Llama-2-7b-hf"
CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
    --stage dpo \
    --do_train \
    --model_name_or_path ${MODEL_ID} \
    --adapter_name_or_path path_to_sft_checkpoint \
    --create_new_adapter \
    --dataset comparison_gpt4_en \
    --template default \
    --finetuning_type lora \
    --lora_target q_proj,v_proj \
    --output_dir path_to_dpo_checkpoint \
    --per_device_train_batch_size 2 \
    --gradient_accumulation_steps 4 \
    --lr_scheduler_type cosine \
    --logging_steps 10 \
    --save_steps 1000 \
    --learning_rate 1e-5 \
    --num_train_epochs 1.0 \
    --fp16 \
    --max_samples 500 \
    --plot_loss True 

## Reward Modeling (RW), RLHF 
- 反映人类对生成回答偏好

In [None]:
%%bash
#METHOD 01
rm -rf path_to_rm_checkpoint

MODEL_ID="/work/g00cjz00/github/LLaMA-Factory/Llama-2-7b-hf"

CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
    --stage rm \
    --do_train \
    --model_name_or_path ${MODEL_ID} \
    --adapter_name_or_path path_to_sft_checkpoint \
    --create_new_adapter \
    --dataset comparison_gpt4_en \
    --template default \
    --finetuning_type lora \
    --lora_target q_proj,v_proj \
    --output_dir path_to_rm_checkpoint \
    --per_device_train_batch_size 2 \
    --gradient_accumulation_steps 4 \
    --lr_scheduler_type cosine \
    --logging_steps 10 \
    --save_steps 1000 \
    --learning_rate 1e-6 \
    --num_train_epochs 1.0 \
    --plot_loss \
    --fp16

## PPO Training (PPO), RLHF
- 強化學習（PPO）來最大化預測的報酬（reward）

In [None]:
%%bash
#METHOD 01
rm -rf path_to_ppo_checkpoint

MODEL_ID="/work/g00cjz00/github/LLaMA-Factory/Llama-2-7b-hf"

CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
    --stage ppo \
    --do_train \
    --model_name_or_path ${MODEL_ID} \
    --adapter_name_or_path path_to_sft_checkpoint \
    --create_new_adapter \
    --dataset alpaca_gpt4_en \
    --template default \
    --finetuning_type lora \
    --lora_target q_proj,v_proj \
    --reward_model path_to_rm_checkpoint \
    --output_dir path_to_ppo_checkpoint \
    --per_device_train_batch_size 2 \
    --gradient_accumulation_steps 4 \
    --lr_scheduler_type cosine \
    --top_k 0 \
    --top_p 0.9 \
    --logging_steps 10 \
    --save_steps 1000 \
    --learning_rate 1e-5 \
    --num_train_epochs 1.0 \
    --plot_loss \
    --fp16

## Merge LoRA weights and export model
- Merging LoRA weights into a quantized model is not supported.
- Use --export_quantization_bit 4 and --export_quantization_dataset data/c4_demo.json to quantize the model after merging the LoRA weights.

In [None]:
%%bash
#METHOD 02
rm -rf path_to_export

MODEL_ID="/work/g00cjz00/github/LLaMA-Factory/Llama-2-7b-hf"

python src/export_model.py \
    --model_name_or_path ${MODEL_ID} \
    --adapter_name_or_path path_to_checkpoint \
    --template default \
    --finetuning_type lora \
    --export_dir path_to_export \
    --export_size 2 \
    --export_legacy_format False

## API
- Visit http://localhost:8000/docs for API documentation.

In [None]:
%%bash
MODEL_ID="/work/g00cjz00/github/LLaMA-Factory/Llama-2-7b-hf"

python src/api_demo.py \
    --model_name_or_path ${MODEL_ID} \
    --adapter_name_or_path path_to_sft_checkpoint \
    --template default \
    --finetuning_type lora

## CLI Demo

In [None]:
%%bash
MODEL_ID="/work/g00cjz00/github/LLaMA-Factory/Llama-2-7b-hf"

python src/web_demo.py \
    --model_name_or_path ${MODEL_ID} \
    --adapter_name_or_path path_to_sft_checkpoint \
    --template default \
    --finetuning_type lora

## Web Demo

In [None]:
%%bash
MODEL_ID="/work/g00cjz00/github/LLaMA-Factory/Llama-2-7b-hf"

python src/web_demo.py \
    --model_name_or_path ${MODEL_ID} \
    --adapter_name_or_path path_to_checkpoint \
    --template default \
    --finetuning_type lora

## Evaluation

In [None]:
%%bash
MODEL_ID="/work/g00cjz00/github/LLaMA-Factory/Llama-2-7b-hf"

CUDA_VISIBLE_DEVICES=0 python src/evaluate.py \
    --model_name_or_path ${MODEL_ID} \
    --adapter_name_or_path path_to_sft_checkpoint \
    --template vanilla \
    --finetuning_type lora
    --task mmlu \
    --split test \
    --lang en \
    --n_shot 5 \
    --batch_size 4

## Predict
- Use --per_device_train_batch_size=1 for LLaMA-2 models in fp16 predict.
- We recommend using --per_device_eval_batch_size=1 and --max_target_length 128 at 4/8-bit predict.

In [None]:
%%bash
rm -rf path_to_predict_result
MODEL_ID="/work/u00cjz00/slurm_jobs/github/models/CKIP-Llama-2-7b-chat"

CUDA_VISIBLE_DEVICES=1 python src/train_bash.py \
    --stage sft \
    --do_predict \
    --model_name_or_path ${MODEL_ID} \
    --adapter_name_or_path path_to_sft_checkpoint \
    --dataset alpaca_gpt4_en \
    --template default \
    --finetuning_type lora \
    --output_dir path_to_predict_result \
    --per_device_eval_batch_size 8 \
    --max_samples 100 \
    --predict_with_generate \
    --fp16 \
    --max_samples 500

In [None]:
%%bash
#METHOD 04 ds_config.json
cat << \EOF >  ds_config.json
{
  "train_batch_size": "auto",
  "train_micro_batch_size_per_gpu": "auto",
  "gradient_accumulation_steps": "auto",
  "gradient_clipping": "auto",
  "zero_allow_untested_optimizer": true,
  "fp16": {
    "enabled": "auto",
    "loss_scale": 0,
    "initial_scale_power": 16,
    "loss_scale_window": 1000,
    "hysteresis": 2,
    "min_loss_scale": 1
  },  
  "zero_optimization": {
    "stage": 2,
    "allgather_partitions": true,
    "allgather_bucket_size": 5e8,
    "reduce_scatter": true,
    "reduce_bucket_size": 5e8,
    "overlap_comm": false,
    "contiguous_gradients": true
  }
}
EOF

#METHOD 04 Deepspeed
rm -rf path_to_predict_result
MODEL_ID="/work/u00cjz00/slurm_jobs/github/models/CKIP-Llama-2-7b-chat"
export PATH=$PATH:$HOME/.local/bin;
deepspeed --num_gpus 2 --master_port=9901 src/train_bash.py \
    --deepspeed ds_config.json \
    --stage sft \
    --do_predict \
    --model_name_or_path ${MODEL_ID} \
    --adapter_name_or_path path_to_sft_checkpoint \
    --dataset alpaca_gpt4_en \
    --template default \
    --finetuning_type lora \
    --output_dir path_to_predict_result \
    --per_device_eval_batch_size 8 \
    --max_samples 100 \
    --predict_with_generate \
    --fp16

In [None]:
!srun --mpi=pmi2 echo ${SLURM_PROCID}


## JSON

In [None]:
# 所有資料內容
import pandas as pd 
file_path='./path_to_predict_result/generated_predictions.jsonl'
df = pd.read_json(path_or_buf=file_path, lineas=True)
df

In [None]:
# 所有資料內容
import pandas as pd 
file_path='./path_to_predict_result/all_results.json'
df = pd.read_json(path_or_buf=file_path)
df