In [None]:
"""下载
LLaMA-Factory 用于微调
DISC-Law-SFT 法律数据
"""
%cd /root/autodl-tmp
!source /etc/network_turbo && git clone --depth=1 https://github.com/hiyouga/LLaMA-Factory
!source /etc/network_turbo &&  git clone https://huggingface.co/datasets/ShengbinYue/DISC-Law-SFT

In [None]:
"""下载模型
"""
%cd /root/autodl-tmp
!source /etc/network_turbo && git clone --depth=1 https://huggingface.co/THUDM/chatglm3-6b
!source /etc/network_turbo && git clone --depth=1 https://huggingface.co/baichuan-inc/Baichuan2-7B-Chat
!source /etc/network_turbo && git clone --depth=1 https://huggingface.co/microsoft/phi-1_5

In [None]:
"""在LLaMA-Factory中添加DISC-Law-SFT 法律数据
"""
%cd /root/autodl-tmp
!apt-get install -y jq
!cp DISC-Law-SFT/DISC-Law-SFT-Pair.jsonl LLaMA-Factory/data/
!cp DISC-Law-SFT/DISC-Law-SFT-Triplet-released.jsonl LLaMA-Factory/data/
!jq '.law_sft_pair={"file_name": "DISC-Law-SFT-Pair.jsonl", "columns": {"prompt": "input", "response": "output"}}' LLaMA-Factory/data/dataset_info.json > new_dataset_info.json
!cp  new_dataset_info.json LLaMA-Factory/data/dataset_info.json
!jq '.law_sft_triplet={"file_name": "DISC-Law-SFT-Triplet-released.jsonl", "columns": {"prompt": "input", "response": "output"}}' LLaMA-Factory/data/dataset_info.json > new_dataset_info.json
!cp  new_dataset_info.json LLaMA-Factory/data/dataset_info.json

In [None]:
"""替换自我认知self_cognition数据集中的名称
"""
%cd /root/autodl-tmp/LLaMA-Factory
!sed -i 's/<NAME>/法律AI/g' data/self_cognition.json
!sed -i 's/<AUTHOR>/billvsme/g' data/self_cognition.json

In [None]:
"""安装依赖
"""
%cd /root/autodl-tmp/LLaMA-Factory
!pip install -r requirements.txt
!pip install einops
!pip install transformers==4.34.0
!pip install deepspeed
!pip install xformers

In [None]:
"""生成deepspeed配置文件
stage 2
"""
%cd /root/autodl-tmp/LLaMA-Factory
!echo '''{\
  "train_batch_size": "auto",\
  "train_micro_batch_size_per_gpu": "auto",\
  "gradient_accumulation_steps": "auto",\
  "gradient_clipping": "auto",\
  "zero_allow_untested_optimizer": true,\
  "fp16": {\
    "enabled": "auto",\
    "loss_scale": 0,\
    "initial_scale_power": 16,\
    "loss_scale_window": 1000,\
    "hysteresis": 2,\
    "min_loss_scale": 1\
  },\
  "zero_optimization": {\
    "stage": 2,\
    "offload_optimizer": {\
      "device": "cpu",\
      "pin_memory": true\
    },\
    "allgather_partitions": true,\
    "allgather_bucket_size": 2e8,\
    "reduce_scatter": true,\
    "reduce_bucket_size":2e8,\
    "overlap_comm": true,\
    "contiguous_gradients": true\
  }\
}''' > ds_config.json

In [None]:
"""生成deepspeed配置文件
stage 3
"""
%cd /root/autodl-tmp/LLaMA-Factory
!echo '''{\
  "train_batch_size": "auto",\
  "train_micro_batch_size_per_gpu": "auto",\
  "gradient_accumulation_steps": "auto",\
  "gradient_clipping": "auto",\
  "zero_allow_untested_optimizer": true,\
  "fp16": {\
    "enabled": "auto",\
    "loss_scale": 0,\
    "initial_scale_power": 16,\
    "loss_scale_window": 1000,\
    "hysteresis": 2,\
    "min_loss_scale": 1\
  },\
  "zero_optimization": {\
    "stage": 3,\
    "offload_optimizer": {\
      "device": "cpu",\
      "pin_memory": true\
    },\
    "offload_param": {\
      "device": "cpu",\
      "pin_memory": true\
    },\
    "overlap_comm": true,\
    "contiguous_gradients": true,\
    "sub_group_size": 5e7,\
    "reduce_bucket_size": "auto",\
    "stage3_prefetch_bucket_size": "auto",\
    "stage3_param_persistence_threshold": "auto",\
    "stage3_max_live_parameters": 5e7,\
    "stage3_max_reuse_distance": 5e7,\
    "stage3_gather_16bit_weights_on_model_save": true\
  }\
}''' > ds_config.json

In [None]:
"""phi-1_5 训练
指令监督微调，lora方式，使用self_cognition

由于没有对话历史，template使用chatglm3

使用deepspeed stage2，offload_optimizer -> cpu节省显存
"""
%cd /root/autodl-tmp/LLaMA-Factory
!rm -rf saves/Phi1.5-1.3B/full/law_full
!deepspeed --num_gpus 2 --master_port=9901 src/train_bash.py \
    --deepspeed ds_config.json \
    --stage sft \
    --model_name_or_path ../phi-1_5/ \
    --do_train True \
    --finetuning_type full \
    --template vanilla \
    --flash_attn False \
    --shift_attn False \
    --dataset_dir data \
    --dataset self_cognition,law_sft_triplet \
    --cutoff_len 2048 \
    --learning_rate 2e-04 \
    --num_train_epochs 5.0 \
    --max_samples 1000 \
    --per_device_train_batch_size 1 \
    --per_device_eval_batch_size 1 \
    --gradient_accumulation_steps 1 \
    --lr_scheduler_type cosine \
    --max_grad_norm 1.0 \
    --logging_steps 5 \
    --save_steps 1000 \
    --warmup_steps 0 \
    --neft_alpha 0 \
    --train_on_prompt False \
    --upcast_layernorm False \
    --output_dir saves/Phi1.5-1.3B/full/law_full \
    --fp16 True \
    --plot_loss True

In [None]:
"""chatglm3-6b 训练 lora
指令监督微调，lora方式，使用self_cognition

由于没有对话历史，template使用chatglm3

使用deepspeed stage2，offload_optimizer -> cpu节省显存
"""
%cd /root/autodl-tmp/LLaMA-Factory
!rm -rf saves/chatglm3/lora/law
!source /etc/network_turbo && deepspeed --num_gpus 2 --master_port=9901 src/train_bash.py \
    --deepspeed ds_config.json \
    --stage sft \
    --model_name_or_path ../chatglm3-6b \
    --do_train True \
    --finetuning_type lora \
    --template chatglm3 \
    --flash_attn False \
    --shift_attn False \
    --dataset_dir data \
    --dataset self_cognition,law_sft_triplet \
    --cutoff_len 2048 \
    --learning_rate 5e-05 \
    --num_train_epochs 10.0 \
    --max_samples 1000 \
    --per_device_train_batch_size 1 \
    --per_device_eval_batch_size 1 \
    --gradient_accumulation_steps 1 \
    --lr_scheduler_type cosine \
    --max_grad_norm 1.0 \
    --logging_steps 5 \
    --save_steps 1000 \
    --warmup_steps 0 \
    --neft_alpha 0 \
    --train_on_prompt False \
    --upcast_layernorm False \
    --lora_rank 8 \
    --lora_dropout 0.1 \
    --lora_target query_key_value \
    --resume_lora_training True \
    --output_dir saves/chatglm3/lora/law \
    --fp16 True \
    --plot_loss True

In [None]:
"""chatglm3-6b 训练 full
指令监督微调，full方式，使用self_cognition

由于没有对话历史，template使用chatglm3

使用deepspeed stage2，offload_optimizer -> cpu节省显存
"""
%cd /root/autodl-tmp/LLaMA-Factory
!rm -rf saves/chatglm3/full/law
!source /etc/network_turbo && deepspeed --num_gpus 2 --master_port=9901 src/train_bash.py \
    --deepspeed ds_config.json \
    --stage sft \
    --model_name_or_path ../chatglm3-6b \
    --do_train True \
    --finetuning_type full \
    --template chatglm3 \
    --flash_attn False \
    --shift_attn False \
    --dataset_dir data \
    --dataset self_cognition \
    --cutoff_len 2048 \
    --learning_rate 5e-05 \
    --num_train_epochs 15.0 \
    --max_samples 1000 \
    --per_device_train_batch_size 1 \
    --per_device_eval_batch_size 1 \
    --gradient_accumulation_steps 1 \
    --lr_scheduler_type cosine \
    --max_grad_norm 1.0 \
    --logging_steps 5 \
    --save_steps 1000 \
    --warmup_steps 0 \
    --neft_alpha 0 \
    --train_on_prompt False \
    --upcast_layernorm False \
    --output_dir saves/chatglm3/full/law \
    --fp16 True \
    --plot_loss True

In [None]:
"""Baichuan2 训练 lora
指令监督微调，lora方式，使用self_cognition

由于没有对话历史，template使用chatglm3

使用deepspeed stage2，offload_optimizer -> cpu节省显存
"""
%cd /root/autodl-tmp/LLaMA-Factory
!pip install transformers==4.33.1
!pip install bitsandbytes
!rm -rf saves/chatglm3/lora/law
!source /etc/network_turbo && deepspeed --num_gpus 2 --master_port=9901 src/train_bash.py \
    --deepspeed ds_config.json \
    --stage sft \
    --model_name_or_path baichuan-inc/Baichuan2-7B-Chat \
    --do_train True \
    --finetuning_type lora \
    --template chatglm3 \
    --flash_attn False \
    --shift_attn False \
    --dataset_dir data \
    --dataset self_cognition \
    --cutoff_len 2048 \
    --learning_rate 5e-05 \
    --num_train_epochs 10.0 \
    --max_samples 1000 \
    --per_device_train_batch_size 1 \
    --per_device_eval_batch_size 1 \
    --gradient_accumulation_steps 1 \
    --lr_scheduler_type cosine \
    --max_grad_norm 1.0 \
    --logging_steps 5 \
    --save_steps 1000 \
    --warmup_steps 0 \
    --neft_alpha 0 \
    --train_on_prompt False \
    --upcast_layernorm False \
    --lora_rank 8 \
    --lora_dropout 0.1 \
    --lora_target query_key_value \
    --resume_lora_training True \
    --output_dir saves/chatglm3/lora/law \
    --fp16 True \
    --plot_loss True

In [None]:
"""导出模型
"""
%cd /root/autodl-tmp/LLaMA-Factory
!mkdir out_model
!python src/export_model.py \
    --model_name_or_path  ../chatglm3-6b \
    --template chatglm3 \
    --finetuning_type lora \
    --checkpoint_dir saves/chatglm3/lora/law \
    --export_dir out_model/law_chatglm3

In [None]:
"""查看模型效果
"""

%cd /root/autodl-tmp/LLaMA-Factory

import os
from threading import Thread

import torch
from transformers.generation.streamers import TextIteratorStreamer
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, AutoModel

torch.set_default_device("cuda")

model_name = "/root/autodl-tmp/LLaMA-Factory/out_model/law_chatglm3/"
model = AutoModel.from_pretrained(model_name, trust_remote_code=True).cuda()
model = model.eval()

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)

def stream(prompt):
    inputs = tokenizer(prompt, return_tensors='pt', return_attention_mask=True)
    streamer = TextIteratorStreamer(
        tokenizer,
        decode_kwargs={"skip_special_tokens": True})
    Thread(
        target=model.generate, kwargs=dict(
            inputs, streamer=streamer,
            max_new_tokens=1024)
    ).start()

    start = False
    for text in streamer:
        if not start:
            if prompt in text:
                start = True
            continue

        if not text:
            continue
        if tokenizer.eos_token in text:
            break

        yield text

    yield ""


def main():
    welcome_prompt = "欢迎使用 法律AI 模型，输入内容即可进行对话，clear 清空对话历史，stop 终止程序"
    print(welcome_prompt)
    while True:
        query = input("\n用户：")
        if query.strip() == "stop":
            break
        if query.strip() == "clear":
            os.system("clr")
            print(welcome_prompt)
            continue
        print("\n法律AI：", end="")
        for text in stream(query):
            print(text, end="", flush=True)
        print("")



main()