Skip to content

Commit

Permalink
[LLM] add chatglm and codellama extension test (#837)
Browse files Browse the repository at this point in the history
  • Loading branch information
changwangss committed Dec 1, 2023
1 parent 7344717 commit 130b594
Show file tree
Hide file tree
Showing 10 changed files with 165 additions and 20 deletions.
108 changes: 108 additions & 0 deletions examples/.config/pytorch_optimize.json
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,33 @@
}
}
},
"codellama_7b": {
"working_dir": "huggingface/pytorch/code-generation/quantization",
"tune":{
"cmd": "bash run_tuning.sh",
"params": {
"topology": "codellama_7b",
"task": "",
"approach": "static",
"backend": "ipex",
"output_model": "saved_results"
}
},
"benchmark": {
"cmd": "bash run_benchmark.sh",
"params": {
"topology": "codellama_7b",
"task": "",
"approach": "static",
"backend": "ipex",
"mode": "accuracy",
"batch_size": "8",
"iters": "100",
"int8": "false",
"config": "saved_results"
}
}
},
"gpt_neo_clm_static": {
"working_dir": "huggingface/pytorch/language-modeling/quantization",
"tune":{
Expand Down Expand Up @@ -1711,6 +1738,87 @@
}
}
},
"chatglm3_6b_gen_ipex_static": {
"working_dir": "huggingface/pytorch/text-generation/quantization",
"tune": {
"cmd": "bash run_tuning.sh",
"params": {
"topology": "chatglm3_6b",
"task": "generation",
"approach": "static",
"backend": "ipex",
"output_model": "saved_results"
}
},
"benchmark": {
"cmd": "bash run_benchmark.sh",
"params": {
"topology": "chatglm3_6b",
"task": "generation",
"approach": "static",
"backend": "ipex",
"mode": "benchmark",
"batch_size": "112",
"iters": "100",
"int8": "false",
"config": "saved_results"
}
}
},
"chatglm2_6b_gen_ipex_static": {
"working_dir": "huggingface/pytorch/text-generation/quantization",
"tune": {
"cmd": "bash run_tuning.sh",
"params": {
"topology": "chatglm2_6b",
"task": "generation",
"approach": "static",
"backend": "ipex",
"output_model": "saved_results"
}
},
"benchmark": {
"cmd": "bash run_benchmark.sh",
"params": {
"topology": "chatglm2_6b",
"task": "generation",
"approach": "static",
"backend": "ipex",
"mode": "benchmark",
"batch_size": "112",
"iters": "100",
"int8": "false",
"config": "saved_results"
}
}
},
"chatglm_6b_gen_ipex_static": {
"working_dir": "huggingface/pytorch/text-generation/quantization",
"tune": {
"cmd": "bash run_tuning.sh",
"params": {
"topology": "chatglm_6b",
"task": "generation",
"approach": "static",
"backend": "ipex",
"output_model": "saved_results"
}
},
"benchmark": {
"cmd": "bash run_benchmark.sh",
"params": {
"topology": "chatglm_6b",
"task": "generation",
"approach": "static",
"backend": "ipex",
"mode": "benchmark",
"batch_size": "112",
"iters": "100",
"int8": "false",
"config": "saved_results"
}
}
},
"flan-t5-large_gen_ipex_static": {
"working_dir": "huggingface/pytorch/text2text-generation",
"tune": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ We provide the inference benchmarking script `run_generation.py` for Starcoder a

# Prerequisite​
## 1. Environment​
Recommend python 3.7 or higher version is recommended. The dependent packages are listed in requirements, please install them as follows,
Recommend python version is 3.10 due to [code evaluation library](https://github.com/bigcode-project/bigcode-evaluation-harness) limitation. The dependent packages are listed in requirements, please install them as follows,

```shell
git clone https://github.com/intel/intel-extension-for-transformers.git
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ function init_params {
iters=100
batch_size=1
tuned_checkpoint=saved_results
lm_eval_tasks="openai_humaneval"
lm_eval_tasks="humaneval"
script="run_generation.py"
for var in "$@"
do
Expand Down Expand Up @@ -67,10 +67,10 @@ function run_benchmark {

if [[ ${mode} == "accuracy" ]]; then
mode_cmd=" --accuracy "
extra_cmd=$extra_cmd" --tasks ${lm_eval_tasks} --allow_code_execution --n_samples 20 --batch_size 20 --do_sample"
batch_size=112
extra_cmd=$extra_cmd" --tasks ${lm_eval_tasks} --allow_code_execution --n_samples 20 --batch_size 20 --do_sample"
elif [[ ${mode} == "benchmark" ]]; then
mode_cmd=" --benchmark "
extra_cmd=$extra_cmd" --batch_size ${batch_size}"
else
echo "Error: No such mode: ${mode}"
exit 1
Expand All @@ -82,6 +82,11 @@ function run_benchmark {
if [ "${backend}" = "ipex" ]; then
extra_cmd=$extra_cmd" --ipex"
fi
elif [ "${topology}" = "codellama_7b" ]; then
model_name_or_path="codellama/CodeLlama-7b-hf"
if [ "${backend}" = "ipex" ]; then
extra_cmd=$extra_cmd" --ipex"
fi
fi


Expand All @@ -95,7 +100,6 @@ function run_benchmark {
python -u ./${script} \
--model ${model_name_or_path} \
--output_dir ${tuned_checkpoint} \
--batch_size ${batch_size} \
${mode_cmd} \
${extra_cmd}
else
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,6 @@
tokenizer.pad_token = tokenizer.eos_token


calib_dataset = args.dataset
op_type_dict = {
"add": {"weight": {"dtype": ["fp32"]}, "activation": {"dtype": ["fp32"]}},
}
Expand All @@ -174,7 +173,7 @@
recipes=recipes,
op_type_dict=op_type_dict, # default is {}
excluded_precisions=excluded_precisions, # default is []
calib_dataset=calib_dataset,
calib_dataset=args.dataset,
calib_iters=args.calib_iters,
)
elif args.woq:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ function main {
function init_params {
topology="gpt"
tuned_checkpoint="saved_results"
DATASET_NAME="openai_humaneval"
DATASET_NAME="mbpp"
model_name_or_path="bigcode/starcoder"
extra_cmd=""
batch_size=8
Expand Down Expand Up @@ -61,15 +61,19 @@ function run_tuning {
extra_cmd=$extra_cmd" --ipex"
alpha=0.5
fi
elif [ "${topology}" = "codellama_7b" ]; then
model_name_or_path="codellama/CodeLlama-7b-hf"
if [ "${backend}" = "ipex" ]; then
extra_cmd=$extra_cmd" --ipex"
alpha=0.5
fi
fi

if [ ${script} = "run_generation.py" ];then
python ./${script} \
--model ${model_name_or_path} \
--output_dir ${tuned_checkpoint} \
--dataset ${DATASET_NAME} \
--calib_split "test" \
--quantize \
--sq \
--alpha ${alpha} \
${extra_cmd}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ We provide the inference benchmarking script `run_generation.py` for large langu

# Prerequisite​
## 1. Create Environment​
Pytorch and Intel-extension-for-pytorch version 2.1 are required, the dependent packages are listed in requirements, we recommend create environment as the following steps.
Pytorch and Intel-extension-for-pytorch version 2.1 are required, python version requests equal or higher than 3.9 due to [text evaluation library](https://github.com/EleutherAI/lm-evaluation-harness/tree/master) limitation, the dependent packages are listed in requirements, we recommend create environment as the following steps.

```bash
pip install -r requirements.txt
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -109,9 +109,18 @@ function run_benchmark {
model_name_or_path="/tf_dataset2/models/pytorch/dolly_v2_3b"
elif [ "${topology}" = "mpt_7b_chat" ]; then
model_name_or_path="mosaicml/mpt-7b-chat"
elif [ "${topology}" = "chatglm3_6b" ]; then
model_name_or_path="THUDM/chatglm3-6b"
extra_cmd=$extra_cmd" --trust_remote_code True"
elif [ "${topology}" = "chatglm2_6b" ]; then
model_name_or_path="THUDM/chatglm2-6b"
extra_cmd=$extra_cmd" --trust_remote_code True"
elif [ "${topology}" = "chatglm_6b" ]; then
model_name_or_path="THUDM/chatglm-6b"
extra_cmd=$extra_cmd" --trust_remote_code True"
pip install transformers==4.33
fi


if [[ ${int8} == "true" ]]; then
if [ "${topology}" = "gpt_j_woq_rtn" ]; then
extra_cmd=$extra_cmd" --woq"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -155,10 +155,6 @@
op_type_dict = {".*": {"activation": {"algorithm": "minmax"}}}
else:
op_type_dict = {}
if re.search("dolly", args.model):
ipex_opt_llm = False
else:
ipex_opt_llm = None
excluded_precisions = [] if args.int8_bf16_mixed else ["bf16"]
recipes = {
"smooth_quant": True,
Expand All @@ -172,7 +168,6 @@
op_type_dict=op_type_dict, # default is {}
excluded_precisions=excluded_precisions, # default is []
num_beams=generate_kwargs["num_beams"],
ipex_opt_llm=ipex_opt_llm,
)
elif args.woq:
quantization_config = WeightOnlyQuantConfig(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,25 @@ function run_tuning {
model_name_or_path="mosaicml/mpt-7b-chat"
extra_cmd=$extra_cmd" --sq --alpha ${alpha}"
extra_cmd=$extra_cmd" --output_dir ${tuned_checkpoint}"
elif [ "${topology}" = "chatglm3_6b" ]; then
alpha=0.75
model_name_or_path="THUDM/chatglm3-6b"
extra_cmd=$extra_cmd" --sq --alpha ${alpha}"
extra_cmd=$extra_cmd" --output_dir ${tuned_checkpoint}"
extra_cmd=$extra_cmd" --trust_remote_code True"
elif [ "${topology}" = "chatglm2_6b" ]; then
alpha=0.75
model_name_or_path="THUDM/chatglm2-6b"
extra_cmd=$extra_cmd" --sq --alpha ${alpha}"
extra_cmd=$extra_cmd" --output_dir ${tuned_checkpoint}"
extra_cmd=$extra_cmd" --trust_remote_code True"
elif [ "${topology}" = "chatglm_6b" ]; then
alpha=0.75
model_name_or_path="THUDM/chatglm-6b"
extra_cmd=$extra_cmd" --sq --alpha ${alpha}"
extra_cmd=$extra_cmd" --output_dir ${tuned_checkpoint}"
extra_cmd=$extra_cmd" --trust_remote_code True"
pip install transformers==4.33
fi

if [ ${script} = "run_generation.py" ];then
Expand Down
13 changes: 10 additions & 3 deletions intel_extension_for_transformers/llm/evaluation/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,10 @@ def prepare_inputs_for_generation(self, input_ids, past_key_values=None, **kwarg
past_key_values = past_key_values or kwargs.get("past", None)

if self.use_cache and past_key_values is not None:
if not re.search("THUDM/chatglm-6b", self.config.auto_map["AutoConfig"]):
if not (
self.config.model_type == "chatglm"
and re.search("THUDM/chatglm-6b", self.config.auto_map["AutoConfig"])
):
input_ids = input_ids[:, -1:]

# `past_key_values` may be in the stardard format (e.g. in contrastive search),
Expand All @@ -73,7 +76,9 @@ def prepare_inputs_for_generation(self, input_ids, past_key_values=None, **kwarg
if past_key_values:
position_ids = position_ids[:, -1].unsqueeze(-1)

if re.search("THUDM/chatglm-6b", self.config.auto_map["AutoConfig"]):
if self.config.model_type == "chatglm" and re.search(
"THUDM/chatglm-6b", self.config.auto_map["AutoConfig"]
):
MASK, gMASK = self.config.mask_token_id, self.config.gmask_token_id
seqs = input_ids.tolist()
mask_positions, use_gmasks = [], []
Expand Down Expand Up @@ -160,7 +165,9 @@ def forward(
inputs["past_key_values"] = past_key_values
if attention_mask is None:
inputs["attention_mask"] = torch.ones_like(input_ids)
if re.search("THUDM/chatglm-6b", self.config.auto_map["AutoConfig"]):
if model_type == "chatglm" and re.search(
"THUDM/chatglm-6b", self.config.auto_map["AutoConfig"]
):
if position_ids is None:
position_ids = self.prepare_inputs_for_generation(input_ids)[
"position_ids"
Expand Down

0 comments on commit 130b594

Please sign in to comment.