# LLaMA_Factory WEB (HPC)

## 環境安裝
- 切換到原生 Python 3 (ipykernel)

In [None]:
%%bash
### 0. pipi install libsray savefolder
saveFolder=work
label=LlamaFactory

### 1. IMAGE
IMAGE=/work/u00cjz00/nvidia/cuda118/c00cjz00_cuda11.8_pytorch_2.1.2-cuda11.8-cudnn8-devel-llama_factory.sif
IMAGE_basename=S-${saveFolder}-${label}_$(basename "$IMAGE" .sif)

### 2. VIRTUAL LIBRARY and BINARY FOLDER
libraryFolder=/${saveFolder}/$(whoami)/libraryFolder/${IMAGE_basename}/local/lib
libraryFolder_binding=${libraryFolder}:${HOME}/.local/lib
binFolder=/${saveFolder}/$(whoami)/libraryFolder/${IMAGE_basename}/local/bin
binFolder_binding=${binFolder}:${HOME}/.local/bin
rm -rf /${saveFolder}/$(whoami)/libraryFolder/${IMAGE_basename}
mkdir -p ${libraryFolder} ${binFolder}

# 3. PIP INSTALL SLAVE IPYKERNEL
ml libs/singularity/3.10.2
singularity exec -B ${libraryFolder_binding} -B ${binFolder_binding} ${IMAGE} pip install -q ipykernel IProgress ipywidgets

# 4. IPYKERNEL for IMAGE
IPYKERNEL=/work/u00cjz00/slurm_jobs/ipykernel/t2/image_with_ipykernel_local
mkdir -p ${HOME}/.local/share/jupyter/kernels/
rm -rf ${HOME}/.local/share/jupyter/kernels/${IMAGE_basename}
cp -rf ${IPYKERNEL} ${HOME}/.local/share/jupyter/kernels/${IMAGE_basename}
chmod -R 755 ${HOME}/.local/share/jupyter/kernels/${IMAGE_basename}
IMAGE_desc=$(echo $IMAGE | sed 's_/_\\/_g')
sed -i "s/templateSIF/${IMAGE_desc}/g" ${HOME}/.local/share/jupyter/kernels/${IMAGE_basename}/kernel.json
sed -i "s/templateImage/Image_${IMAGE_basename}/g" ${HOME}/.local/share/jupyter/kernels/${IMAGE_basename}/kernel.json
sed -i "s@templateLibrayFolder@${libraryFolder_binding}@g" ${HOME}/.local/share/jupyter/kernels/${IMAGE_basename}/kernel.json
sed -i "s@templateBinFolder@${binFolder_binding}@g" ${HOME}/.local/share/jupyter/kernels/${IMAGE_basename}/kernel.json

# 5. check size
du -sh ${libraryFolder}
echo /home/$(whoami)/.local/share/jupyter/kernels/${IMAGE_basename}/kernel.json
cat /home/$(whoami)/.local/share/jupyter/kernels/${IMAGE_basename}/kernel.json

cmd="/work/opt/ohpc/Taiwania3/libs/singularity/3.10.2/bin/singularity exec --nv --cleanenv \
-B /work -B /work/u00cjz00/os/ubuntu/bin:/usr/ubuntu_bin \
-B ${libraryFolder}:/home/g00cjz00/.local/lib \
-B ${binFolder}:/home/g00cjz00/.local/bin \
${IMAGE} \
bash -c 'export PATH=\$PATH:\$HOME/.local/bin; echo \$PATH;'
"

echo ${cmd}

## 初始環境設定
- 重新reload kernel, 並切換到 kernel Image_S-work-LlamaFactory_c00cjz00_cuda11.8_pytorch_2.1.2-cuda11.8-cudnn8-devel
- https://zhuanlan.zhihu.com/p/645010851

In [None]:
# 初始環境設定
import os
from pathlib import Path
HOME = str(Path.home())
Add_Binarry_Path=HOME+'/.local/bin:/usr/ubuntu_bin'
os.environ['PATH']=os.environ['PATH']+':'+Add_Binarry_Path

In [None]:
!nvidia-smi
import torch
torch.cuda.is_available()

## 套件安裝 Llama-factory 免安裝

# 套件安裝 Llama-factory
!pip install llmtuner==0.5.3 -q 
!pip install deepspeed==0.13.4 -q
!pip install bitsandbytes==0.42.0 -q

In [None]:
#pip install "unsloth[cu118] @ git+https://github.com/unslothai/unsloth.git"

## 下載 LLaMA-Factory

In [None]:
%%bash
rm -rf LLaMA-Factory
git clone https://github.com/hiyouga/LLaMA-Factory.git

## 啟動 Llama-Factory

In [None]:
%%bash
# IP
node_ip=$(cat /etc/hosts |grep "$(hostname -a)" | awk '{print $1}')
# PORT
noed_port_genai=$(python -c "import socket; s = socket.socket(socket.AF_INET, socket.SOCK_STREAM); s.bind(('', 0)); addr = s.getsockname(); s.close(); print(addr[1])")
# PROXY
proxy_url=/rstudio/${node_ip}/${noed_port_genai}
# URL
https_url=https://node01.biobank.org.tw${proxy_url}/

# SCRIPT FILE
cat << EOF >  LLaMA-Factory/src/train_web_demo.py
from llmtuner import create_ui
import os
os.environ["WANDB_DISABLED"] = "true"

#print("${https_url}")

def main():
    demo = create_ui()
    demo.queue()
    demo.launch(server_port=${noed_port_genai}, server_name="$(hostname -s)", share=False, inbrowser=True, root_path="${proxy_url}", auth=("nchc", "nchcorgtw"))

if __name__ == "__main__":
    main()
EOF

# 參除
ps -ef |grep train_web_demo | awk '{print $2}' | xargs kill -9
sleep 1

#請更新以下 HF_TOKEN
echo $https_url
echo "Account: nchc"
echo "Password: nchcorgtw"
cd LLaMA-Factory
CUDA_VISIBLE_DEVICES=1 HF_TOKEN='hf_' nohup python src/train_web_demo.py > ./llamafactory.log 2>&1 &

## CMD LINE LORA

In [None]:
%%bash 
cd LLaMA-Factory
CUDA_VISIBLE_DEVICES=3 python src/train_bash.py \
    --stage sft \
    --do_train True \
    --model_name_or_path /work/u00cjz00/slurm_jobs/github/models/Llama-2-7b-chat-hf \
    --finetuning_type lora \
    --template default \
    --dataset_dir data \
    --dataset medical \
    --cutoff_len 1024 \
    --learning_rate 5e-05 \
    --num_train_epochs 3.0 \
    --max_samples 100000 \
    --per_device_train_batch_size 2 \
    --gradient_accumulation_steps 8 \
    --lr_scheduler_type cosine \
    --max_grad_norm 1.0 \
    --logging_steps 5 \
    --save_steps 100 \
    --warmup_steps 0 \
    --output_dir saves/LLaMA2-7B/lora/train_2024_v1 \
    --fp16 True \
    --lora_rank 8 \
    --lora_alpha 16 \
    --lora_dropout 0.1 \
    --lora_target q_proj,v_proj \
    --plot_loss True 

## CMDLINE FULL 
### Deepspeed STAGE 2

## ACCELERATE

In [None]:
%%bash
ps -ef |grep 'train_bash.py' | awk '{print $2}' | xargs kill -9
sleep 10
rm -rf ~/.cache/*
cd LLaMA-Factory
rm -rf /scratch/g00cjz00/save/LLaMA2-7B/lora/train_2024_accelerate
sleep 3

CUDA_VISIBLE_DEVICES=0,1,2,3 accelerate launch --config_file examples/lora_multi_gpu/config.yaml src/train_bash.py \
    --stage sft \
    --do_train \
    --model_name_or_path /work/u00cjz00/slurm_jobs/github/models/Llama-2-7b-chat-hf \
    --finetuning_type full \
    --template default \
    --dataset_dir data \
    --dataset medical \
    --cutoff_len 4096 \
    --learning_rate 5e-05 \
    --num_train_epochs 3.0 \
    --max_samples 100000 \
    --per_device_train_batch_size 2 \
    --gradient_accumulation_steps 8 \
    --lr_scheduler_type cosine \
    --max_grad_norm 1.0 \
    --logging_steps 10 \
    --save_steps 100 \
    --warmup_steps 0 \
    --output_dir /scratch/g00cjz00/save/LLaMA2-7B/lora/train_2024_v1 \
    --fp16 True \
    --lora_rank 8 \
    --lora_dropout 0.1 \
    --lora_target q_proj,v_proj \
    --plot_loss True 

#> ./medical.log 2>&1 &

## DEEPSPEED 7B

In [None]:
%%bash
ps -ef |grep 'train_bash.py' | awk '{print $2}' | xargs kill -9
sleep 3
rm -rf ~/.cache/*
cd LLaMA-Factory
rm -rf /scratch/g00cjz00/save/LLaMA2-7B/lora/train_2024_v1
sleep 3

deepspeed --num_gpus 4 src/train_bash.py \
    --deepspeed ../deep_speed_stage2_cpu.json \
    --stage sft \
    --do_train True \
    --model_name_or_path /work/u00cjz00/slurm_jobs/github/models/Llama-2-7b-chat-hf \
    --finetuning_type full \
    --template default \
    --dataset_dir data \
    --dataset medical \
    --cutoff_len 4096 \
    --learning_rate 5e-05 \
    --num_train_epochs 3.0 \
    --max_samples 100000 \
    --per_device_train_batch_size 2 \
    --gradient_accumulation_steps 8 \
    --lr_scheduler_type cosine \
    --max_grad_norm 1.0 \
    --logging_steps 10 \
    --save_steps 100 \
    --warmup_steps 0 \
    --output_dir /scratch/g00cjz00/save/LLaMA2-7B/lora/train_2024_v1 \
    --fp16 True \
    --lora_rank 8 \
    --lora_dropout 0.1 \
    --lora_target q_proj,v_proj \
    --plot_loss True 

#> ./medical.log 2>&1 &


## 13B

In [None]:
%%bash
ps -ef |grep 'train_bash.py' | awk '{print $2}' | xargs kill -9
sleep 3
rm -rf ~/.cache/*
cd LLaMA-Factory
rm -rf /scratch/g00cjz00/save/LLaMA2-7B/lora/train_2024_v1
sleep 3

deepspeed --num_gpus 4 src/train_bash.py \
    --deepspeed ../deep_speed_stage3.json \
    --stage sft \
    --do_train True \
    --model_name_or_path /work/g00cjz00/models/meta-llama/Llama-2-13b-chat-hf \
    --finetuning_type full \
    --template default \
    --dataset_dir data \
    --dataset medical \
    --cutoff_len 1024 \
    --learning_rate 5e-05 \
    --num_train_epochs 3.0 \
    --max_samples 100000 \
    --per_device_train_batch_size 1 \
    --gradient_accumulation_steps 1 \
    --lr_scheduler_type cosine \
    --max_grad_norm 1.0 \
    --logging_steps 10 \
    --save_steps 100 \
    --warmup_steps 0 \
    --output_dir /scratch/g00cjz00/save/LLaMA2-7B/lora/train_2024_v1 \
    --fp16 True \
    --lora_rank 8 \
    --lora_dropout 0.1 \
    --lora_target q_proj,v_proj \
    --plot_loss True 

#> ./medical.log 2>&1 &


## MIXTRAL 120G LORA

In [None]:
%%bash
ps -ef |grep 'train_bash.py' | awk '{print $2}' | xargs kill -9
sleep 3
rm -rf ~/.cache/*
cd LLaMA-Factory
rm -rf /scratch/g00cjz00/save/LLaMA2-7B/lora/train_2024_v1
sleep 3

nohup deepspeed --num_gpus 4 src/train_bash.py \
    --deepspeed ../deep_speed_stage3.json \
    --stage sft \
    --do_train True \
    --model_name_or_path /work/g00cjz00/models/model/mistralai/Mixtral-8x7B-Instruct-v0.1 \
    --finetuning_type lora \
    --template mistral \
    --dataset_dir data \
    --dataset medical \
    --cutoff_len 4096 \
    --learning_rate 5e-05 \
    --num_train_epochs 3.0 \
    --max_samples 100000 \
    --per_device_train_batch_size 4 \
    --gradient_accumulation_steps 4 \
    --lr_scheduler_type cosine \
    --max_grad_norm 1.0 \
    --logging_steps 10 \
    --save_steps 100 \
    --warmup_steps 0 \
    --output_dir /scratch/g00cjz00/save/LLaMA2-7B/lora/train_2024_v1 \
    --fp16 True \
    --lora_rank 8 \
    --lora_dropout 0.1 \
    --lora_target q_proj,v_proj \
    --plot_loss True > ./medical.log 2>&1 &
    

In [None]:
%%bash
ps -ef |grep 'train_bash.py' | awk '{print $2}' | xargs kill -9
sleep 3
rm -rf ~/.cache/*
cd LLaMA-Factory
rm -rf /scratch/g00cjz00/save/LLaMA2-7B/lora/train_2024_v2
sleep 3

#nohup deepspeed  --num_gpus 2 src/train_bash.py \
nohup deepspeed --include localhost:0,1,2,3 src/train_bash.py \
    --deepspeed examples/full_multi_gpu/ds_z3_config.json \
    --stage sft \
    --do_train True \
    --model_name_or_path /work/g00cjz00/models/model/mistralai/Mixtral-8x7B-Instruct-v0.1 \
    --finetuning_type lora \
    --template mistral \
    --dataset_dir data \
    --dataset medical \
    --cutoff_len 4096 \
    --learning_rate 5e-05 \
    --num_train_epochs 3.0 \
    --max_samples 100000 \
    --per_device_train_batch_size 4 \
    --gradient_accumulation_steps 4 \
    --lr_scheduler_type cosine \
    --max_grad_norm 1.0 \
    --logging_steps 10 \
    --save_steps 100 \
    --warmup_steps 0 \
    --output_dir /scratch/g00cjz00/save/LLaMA2-7B/lora/train_2024_v2 \
    --fp16 True \
    --lora_rank 8 \
    --lora_dropout 0.1 \
    --lora_target q_proj,v_proj \
    --plot_loss True > ./medical4.log 2>&1 &
    

## 套件安裝 OPENCC
```
s2t.json Simplified Chinese to Traditional Chinese 簡體到繁體
t2s.json Traditional Chinese to Simplified Chinese 繁體到簡體
s2tw.json Simplified Chinese to Traditional Chinese (Taiwan Standard) 簡體到臺灣正體
tw2s.json Traditional Chinese (Taiwan Standard) to Simplified Chinese 臺灣正體到簡體
s2hk.json Simplified Chinese to Traditional Chinese (Hong Kong variant) 簡體到香港繁體
hk2s.json Traditional Chinese (Hong Kong variant) to Simplified Chinese 香港繁體到簡體
s2twp.json Simplified Chinese to Traditional Chinese (Taiwan Standard) with Taiwanese idiom 簡體到繁體（臺灣正體標準）並轉換爲臺灣常用詞彙
tw2sp.json Traditional Chinese (Taiwan Standard) to Simplified Chinese with Mainland Chinese idiom 繁體（臺灣正體標準）到簡體並轉換爲中國大陸常用詞彙
t2tw.json Traditional Chinese (OpenCC Standard) to Taiwan Standard 繁體（OpenCC 標準）到臺灣正體
hk2t.json Traditional Chinese (Hong Kong variant) to Traditional Chinese 香港繁體到繁體（OpenCC 標準）
t2hk.json Traditional Chinese (OpenCC Standard) to Hong Kong variant 繁體（OpenCC 標準）到香港繁體
t2jp.json Traditional Chinese Characters (Kyūjitai) to New Japanese Kanji (Shinjitai) 繁體（OpenCC 標準，舊字體）到日文新字體
jp2t.json New Japanese Kanji (Shinjitai) to Traditional Chinese Characters (Kyūjitai) 日文新字體到繁體（OpenCC 標準，舊字體）
tw2t.json Traditional Chinese (Taiwan standard) to Traditional Chinese 臺灣正體到繁體（OpenCC 標準）
```

In [None]:
!pip install opencc
!pip install opencc-python-reimplemented

## DATASET

In [None]:
import json
import opencc #繁体简体互转
from datasets import load_dataset
import opencc #繁体简体互转
#op_cc=opencc.OpenCC('s2t')
op_cc=opencc.OpenCC('s2twp')



# 讀取數據集，take可以取得該數據集前n筆資料
dataset = load_dataset("michaelwzhu/ChatMed_Consult_Dataset", split="train", streaming=True,encoding='utf-8')

# 提取所需欄位並建立新的字典列表
limit=0
extracted_data = []
for example in dataset:
    extracted_example = {
        "instruction": op_cc.convert("现在你是一名专业的中医医生，请用你的专业知识提供详尽而清晰的关于中医问题的回答。"),
        "input":  op_cc.convert(example["query"]),
        "output":  op_cc.convert(example["response"])
    }
    extracted_data.append(extracted_example)
    if len(extracted_data) == limit:
        break

# 指定 JSON 文件名稱
json_filename = "data.json"

# 寫入 JSON 文件
with open(json_filename, "w") as json_file:
    json.dump(extracted_data, json_file, indent=4)

print(f"數據已提取並保存為 {json_filename}")

In [None]:
# 所有資料內容
import pandas as pd
df = pd. read_json ( 'data.json' )
df.describe()

In [None]:
# 取出前一萬筆, 並儲存
import pandas as pd
df = pd. read_json ( 'data.json' )
dataset_df_10k = df[:10000]
dataset_df_10k.to_json('LLaMA-Factory/data/medicalQA.json', orient='records')
dataset_df_10k

In [None]:
display(dataset_df_10k[0:100].to_string())

In [None]:
# sha1sum , open LLaMA-Factory/data/dataset_info.json
!sha1sum LLaMA-Factory/data/medicalQA.json

```
"medical": {
    "file_name": "medicalQA.json",
    "file_sha1": "07ffc21c67eeee91501332123be8c640259d0607",
    "columns": {
      "prompt": "instruction",
      "query": "input",
      "response": "output"
    }
  },
```

## MEMO
```
/work/u00cjz00/slurm_jobs/github/models/Llama-2-7b-chat-hf
```
```
你是人工智慧助理，以下是用戶和人工智能助理之間的對話。你要對用戶的問題提供有用、安全、詳細和禮貌的回答。USER: {user} ASSISTANT:
```
```
A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. USER: {user} ASSISTANT:
```
```
learning_rate: 5e-05
distributed_type: multi-GPU
optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
lr_scheduler_type: cosine
lr_scheduler_warmup_ratio: 0.03
num_epochs: 5.0
```
```
https://zhuanlan.zhihu.com/p/641960340
```

## SINGULARITY


In [None]:
%%bash
ps -ef |grep 'train_bash.py' | awk '{print $2}' | xargs kill -9
mkdir -p /scratch/$(whoami)/home/$(whoami) /scratch/$(whoami)/tmp
mkdir -p /work/$(whoami)/llama-factory/save
rm -rf /scratch/$(whoami)/home/$(whoami)/.cache/*
rm -rf /work/$(whoami)/llama-factory/save/demo
sleep 3

cat << EOF > /scratch/$(whoami)/tmp/demo.sh
#!/bin/bash
cd /app
deepspeed --num_gpus 4 src/train_bash.py \
    --deepspeed /app/examples/full_multi_gpu/ds_z3_config.json \
    --stage sft \
    --do_train \
    --model_name_or_path /work/u00cjz00/slurm_jobs/github/models/Llama-2-7b-chat-hf \
    --dataset alpaca_gpt4_en \
    --dataset_dir data \
    --template default \
    --finetuning_type full \
    --output_dir /work/$(whoami)/llama-factory/save/demo \
    --overwrite_cache \
    --overwrite_output_dir \
    --cutoff_len 1024 \
    --per_device_train_batch_size 1 \
    --per_device_eval_batch_size 1 \
    --gradient_accumulation_steps 2 \
    --lr_scheduler_type cosine \
    --logging_steps 10 \
    --save_steps 100 \
    --eval_steps 100 \
    --evaluation_strategy steps \
    --learning_rate 5e-5 \
    --num_train_epochs 3.0 \
    --max_samples 3000 \
    --val_size 0.1 \
    --plot_loss \
    --fp16
EOF

chmod 755 /scratch/$(whoami)/tmp/demo.sh

ml libs/singularity/3.10.2
singularity \
exec -C --nv \
-B /scratch/$(whoami)/home/$(whoami):/home/$(whoami) \
-B /scratch/$(whoami)/tmp:/tmp \
-B /work/$(whoami)/llama-factory/save \
-B /work/u00cjz00/slurm_jobs/github/models \
/work/u00cjz00/nvidia/cuda118/c00cjz00_cuda11.8_pytorch_2.1.2-cuda11.8-cudnn8-devel-llama_factory.sif \
/tmp/demo.sh

## SLURM

In [None]:
%%bash

cat << \\EOF > ~/demo.sh
#!/bin/bash

ps -ef |grep 'train_bash.py' | awk '{print $2}' | xargs kill -9
mkdir -p /scratch/$(whoami)/home/$(whoami) /scratch/$(whoami)/tmp
mkdir -p /work/$(whoami)/llama-factory/save
rm -rf /scratch/$(whoami)/home/$(whoami)/.cache/*
rm -rf /work/$(whoami)/llama-factory/save/demo
sleep 3

cat << EOF > /scratch/$(whoami)/tmp/demo.sh
#!/bin/bash
cd /app
deepspeed --num_gpus 4 src/train_bash.py \
    --deepspeed /app/examples/full_multi_gpu/ds_z3_config.json \
    --stage sft \
    --do_train \
    --model_name_or_path /work/u00cjz00/slurm_jobs/github/models/Llama-2-7b-chat-hf \
    --dataset alpaca_gpt4_en \
    --dataset_dir data \
    --template default \
    --finetuning_type full \
    --output_dir /work/$(whoami)/llama-factory/save/demo \
    --overwrite_cache \
    --overwrite_output_dir \
    --cutoff_len 1024 \
    --per_device_train_batch_size 1 \
    --per_device_eval_batch_size 1 \
    --gradient_accumulation_steps 2 \
    --lr_scheduler_type cosine \
    --logging_steps 10 \
    --save_steps 100 \
    --eval_steps 100 \
    --evaluation_strategy steps \
    --learning_rate 5e-5 \
    --num_train_epochs 3.0 \
    --max_samples 3000 \
    --val_size 0.1 \
    --plot_loss \
    --fp16
EOF

chmod 755 /scratch/$(whoami)/tmp/demo.sh

ml libs/singularity/3.10.2
singularity \
exec -C --nv \
-B /scratch/$(whoami)/home/$(whoami):/home/$(whoami) \
-B /scratch/$(whoami)/tmp:/tmp \
-B /work/$(whoami)/llama-factory/save \
-B /work/u00cjz00/slurm_jobs/github/models \
/work/u00cjz00/nvidia/cuda118/c00cjz00_cuda11.8_pytorch_2.1.2-cuda11.8-cudnn8-devel-llama_factory.sif \
/tmp/demo.sh

\EOF

chmod 755 ~/demo.sh



cat << EOF >  ~/demo.slurm
#!/work/u00cjz00/binary/bash5.0/bin/bash
#SBATCH -A GOV109189                                                    ### project number, Example MST109178
#SBATCH -J _t2demo_                                                     ### Job name, Exmaple jupyterlab
#SBATCH -p gp4d                                                         ### Partition Name, Example ngs1gpu
#SBATCH --nodes=1                                                       ### Nodes, Default 1, node number
#SBATCH --ntasks-per-node=1                                             ### Tasks, Default 1, per node tasks
#SBATCH -c 16                                                            ### Cores assigned to each task, Example 4
#SBATCH --gres=gpu:4                                                    ### GPU number, Example gpu:1
#SBATCH --time=0-1:00:00                                                ### Runnung time, days-hours:minutes:seconds or hours:minutes:seconds
#SBATCH -o genai_%j.out                                                 ### Log folder, Here %j is job ID
#SBATCH -e genai_%j.err                                                 ### Log folder, Here %j is job ID


~/demo.sh

EOF

In [None]:
!cat ~/demo.slurm

In [None]:
!cat ~/demo.sh