Skip to content

Commit

Permalink
Enable OPT and LLAMA models (#877)
Browse files Browse the repository at this point in the history
  • Loading branch information
changwangss committed Apr 27, 2023
1 parent 3ade86f commit 6a96085
Show file tree
Hide file tree
Showing 14 changed files with 366 additions and 60 deletions.
63 changes: 63 additions & 0 deletions examples/.config/pytorch_optimize.json
Original file line number Diff line number Diff line change
Expand Up @@ -1282,4 +1282,67 @@
}
}
},
"opt_2.7b_clm_ipex": {
"working_dir": "huggingface/pytorch/language-modeling/quantization",
"tune": {
"cmd": "bash run_tuning.sh",
"params": {
"topology": "opt_2.7b_clm_ipex",
"output_model": "saved_results"
}
},
"benchmark": {
"cmd": "bash run_benchmark.sh",
"params": {
"topology": "opt_2.7b_clm_ipex",
"mode": "accuracy",
"batch_size": "112",
"iters": "100",
"int8": "false",
"config": "saved_results"
}
}
},
"opt_6.7b_clm_ipex": {
"working_dir": "huggingface/pytorch/language-modeling/quantization",
"tune": {
"cmd": "bash run_tuning.sh",
"params": {
"topology": "opt_6.7b_clm_ipex",
"output_model": "saved_results"
}
},
"benchmark": {
"cmd": "bash run_benchmark.sh",
"params": {
"topology": "opt_6.7b_clm_ipex",
"mode": "accuracy",
"batch_size": "112",
"iters": "100",
"int8": "false",
"config": "saved_results"
}
}
},
"llama_7b_clm_ipex": {
"working_dir": "huggingface/pytorch/language-modeling/quantization",
"tune": {
"cmd": "bash run_tuning.sh",
"params": {
"topology": "llama_7b_clm_ipex",
"output_model": "saved_results"
}
},
"benchmark": {
"cmd": "bash run_benchmark.sh",
"params": {
"topology": "llama_7b_clm_ipex",
"mode": "accuracy",
"batch_size": "112",
"iters": "100",
"int8": "false",
"config": "saved_results"
}
}
},
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ conda install gperftools jemalloc==5.2.1 -c conda-forge -y
# Installation
pip install git+https://github.com/intel-innersource/frameworks.ai.nlp-toolkit.intel-nlp-toolkit.git
pip install neural_compressor intel_extension_for_pytorch transformers datasets accelerate
pip install git+https://github.com/intel/neural-compressor.git@6efe818497d5e424deac42580f9fde84f8b8723d
pip install intel_extension_for_pytorch transformers datasets accelerate
# Setup Environment Variables
export KMP_BLOCKTIME=1
Expand All @@ -30,23 +31,68 @@ export LD_PRELOAD=${LD_PRELOAD}:${CONDA_PREFIX}/lib/libtcmalloc.so
```

# Run
## 1. Quantization

Here is how to run the scripts:

**Causal Language Modeling (CLM)**

`evaluate_clm.py` quantizes the large language models using the dataset [NeelNanda/pile-10k](https://huggingface.co/datasets/NeelNanda/pile-10k) calibration and validates `lambada_openai`, `piqa`, `winogrande`, `hellaswag` and other datasets accuracy provided by lm_evaluation_harness, an example command is as follows.
`evaluate_clm.py` quantizes the large language models using the dataset [NeelNanda/pile-10k](https://huggingface.co/datasets/NeelNanda/pile-10k) calibration and validates `lambada_openai`, `piqa`, `winogrande`, `hellaswag` and other datasets accuracy provided by lm_eval, an example command is as follows.
### GPT-J-6b

#### Quantization
```bash
# "--sq" is used to enable smooth quant
# "--int8_bf16_mixed" is used to enable int8-bf16 mixed mode for platform that natively supports bf16
python evaluate_clm.py \
--model EleutherAI/gpt-j-6B \
--quantize \
--dataset lambada \
--dataset NeelNanda/pile-10k \
--sq \
--alpha 0.5 \
--output_dir "saved_results" \
```

#### Accuracy with lm_eval
```bash
# FP32 Accuracy
python evaluate_clm.py \
--model EleutherAI/gpt-j-6B \
--accuracy_only \
--batch_size 112 \
--tasks "lambada_openai" "lambada_standard"\
--int8 \
--output_dir "saved_results" # load int8 model
# to validate IPEX BF16 model, please use "--ipex_bf16" to replace "--int8" and remove "--output_dir".
# to validate FP32 model, please remove "--int8" and "--output_dir".
```
### OPT-2.7b

#### Quantization

```bash
# "--sq" is used to enable smooth quant
# "--int8_bf16_mixed" is used to enable int8-bf16 mixed mode for platform that natively supports bf16
python evaluate_clm.py \
--model facebook/opt-2.7b \
--quantize \
--dataset NeelNanda/pile-10k \
--sq \
--alpha 0.7 \
--output_dir "saved_results"
--alpha 0.5 \
--output_dir "saved_results" \
--int8_bf16_mixed
```

#### Accuracy with lm_eval
```bash
python evaluate_clm.py \
--model facebook/opt-2.7b \
--accuracy_only \
--batch_size 112 \
--tasks "winogrande" "copa" "piqa" "rte" "hellaswag" "openbookqa" "lambada_openai" "lambada_standard" \
--int8 \
--output_dir "saved_results" # load int8 model
# to validate IPEX BF16 model, please use "--ipex_bf16" to replace "--int8" and remove "--output_dir".
# to validate FP32 model, please remove "--int8" and "--output_dir".
```
To do quantization based transformers language-modeling example [`run_clm.py`](https://github.com/huggingface/transformers/blob/main/examples/pytorch/language-modeling/run_clm.py), please use the following command.
```
Expand Down Expand Up @@ -93,30 +139,8 @@ python run_mlm.py \
--overwrite_output_dir
```
## 2. Accuracy
```bash
# FP32 Accuracy
python evaluate_clm.py \
--model EleutherAI/gpt-j-6B \
--accuracy_only \
--batch_size 56

# BF16 Accuracy
python evaluate_clm.py \
--model EleutherAI/gpt-j-6B \
--accuracy_only \
--ipex_bf16 \
--batch_size 56

# INT8 Accuracy
python evaluate_clm.py \
--model EleutherAI/gpt-j-6B \
--accuracy_only \
--int8 \
--batch_size 56
```

## 3. Validated Model List
## 2. Validated Model List

|Type|Pretrained model|PostTrainingDynamic | PostTrainingStatic | QuantizationAwareTraining
|---|------------------------------------|---|---|---
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,14 @@
import json
import fnmatch
import re
import numpy as np
import torch
from datasets import load_dataset
from torch.nn.functional import pad
from torch.utils.data import DataLoader
from transformers import AutoModelForCausalLM, AutoTokenizer
import intel_extension_for_pytorch as ipex


parser = argparse.ArgumentParser()
parser.add_argument(
"--model", nargs="?", default="EleutherAI/gpt-j-6b"
Expand All @@ -37,13 +38,17 @@
help="For accuracy measurement only.")
parser.add_argument("--save_accuracy_path", default=None,
help="Save accuracy results path.")
parser.add_argument("--pad_max_length", default=196, type=int,
parser.add_argument("--pad_max_length", default=512, type=int,
help="Pad input ids to max length.")
parser.add_argument("--calib_iters", default=512, type=int,
help="calibration iters.")
parser.add_argument("--tasks", nargs='+', default=["winogrande", "copa", "piqa", "rte", "hellaswag", \
"openbookqa", "lambada_openai", "lambada_standard", "wikitext"], type=str, \
help="tasks list for accuracy validation")

args = parser.parse_args()
calib_size = 1


class Evaluator:
def __init__(self, dataset, tokenizer, batch_size=8, pad_val=1, pad_max=196, is_calib=False):
self.dataset = dataset
Expand Down Expand Up @@ -116,11 +121,19 @@ def evaluate(self, model):
print("Latency: ", latency)
return acc

user_model = AutoModelForCausalLM.from_pretrained(
args.model,
torchscript=True, # torchscript will force `return_dict=False` to avoid jit errors
)
tokenizer = AutoTokenizer.from_pretrained(args.model)
if re.search("llama", args.model):
from transformers import LlamaForCausalLM, LlamaTokenizer
user_model = LlamaForCausalLM.from_pretrained(
args.model,
torchscript=True, # torchscript will force `return_dict=False` to avoid jit errors
)
tokenizer = LlamaTokenizer.from_pretrained(args.model)
else:
user_model = AutoModelForCausalLM.from_pretrained(
args.model,
torchscript=True, # torchscript will force `return_dict=False` to avoid jit errors
)
tokenizer = AutoTokenizer.from_pretrained(args.model)


# to channels last
Expand All @@ -142,7 +155,7 @@ def evaluate(self, model):

def calib_func(prepared_model):
for i, calib_input in enumerate(calib_dataloader):
if i > 100:
if i > args.calib_iters:
break
prepared_model(calib_input[0])

Expand Down Expand Up @@ -200,19 +213,23 @@ def calib_func(prepared_model):
if args.accuracy_only:
user_model.eval()
def eval_func(user_model):
from intel_extension_for_transformers.evaluation.lm_evaluation_harness.evaluator import evaluate
from intel_extension_for_transformers.evaluation import evaluate
results = evaluate(
model="hf-causal",
model_args='pretrained='+args.model+',tokenizer='+args.model+',dtype=float32',
user_model=user_model,
batch_size=args.batch_size,
tasks=["lambada_openai"]
tasks=args.tasks,
)
dumped = json.dumps(results, indent=2)
if args.save_accuracy_path:
with open(args.save_accuracy_path, "w") as f:
f.write(dumped)
print('Accuracy for lambada_openai is ', results["results"]["lambada_openai"]["acc"])
for task_name in args.tasks:
if task_name == "wikitext":
print("Accuracy for %s is: %s", % task_name, % results["results"][task_name]["word_perplexity"])
else:
print("Accuracy for %s is: %s", % task_name, % results["results"][task_name]["acc"])

with torch.no_grad(), torch.cpu.amp.autocast(enabled=amp_enabled, dtype=amp_dtype):
eval_func(user_model)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,5 @@ datasets >= 1.1.3
torch >= 1.10
transformers
wandb
neural_compressor
git+https://github.com/intel/neural-compressor.git@6efe818497d5e424deac42580f9fde84f8b8723d
git+https://github.com/EleutherAI/lm-evaluation-harness.git@83dbfbf6070324f3e5872f63e49d49ff7ef4c9b3
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ function init_params {
iters=100
batch_size=16
tuned_checkpoint=saved_results
tasks="lambada_openai lambada_standard"
for var in "$@"
do
case $var in
Expand Down Expand Up @@ -92,6 +93,18 @@ function run_benchmark {
script="evaluate_clm.py"
model_name_or_path="/tf_dataset2/models/pytorch/gpt-j-6B"
approach="PostTrainingStatic"
elif [ "${topology}" = "opt_2.7b_clm_ipex" ]; then
script="evaluate_clm.py"
model_name_or_path="facebook/opt-2.7b"
approach="PostTrainingStatic"
elif [ "${topology}" = "opt_6.7b_clm_ipex" ]; then
script="evaluate_clm.py"
model_name_or_path="facebook/opt-6.7b"
approach="PostTrainingStatic"
elif [ "${topology}" = "llama_7b_clm_ipex" ]; then
script="evaluate_clm.py"
model_name_or_path="decapoda-research/llama-7b-hf"
approach="PostTrainingStatic"
elif [ "${topology}" = "bert_mlm_static" ]; then
script="run_mlm.py"
DATASET_NAME="wikitext"
Expand Down Expand Up @@ -154,24 +167,26 @@ function run_benchmark {

if [[ ${int8} == "true" ]]; then
extra_cmd=$extra_cmd" --int8"
model_name_or_path=${tuned_checkpoint}
if [ ${script} != "evaluate_clm.py" ];then
model_name_or_path=${tuned_checkpoint}
fi
fi

if [[ ${int8} == "true" ]] && [ "${topology}" = "gpt_j_6b_clm_ipex" ]; then
model_name_or_path="/tf_dataset2/models/pytorch/gpt-j-6B"
fi

if [[ ${bf16} == "true" ]]; then
extra_cmd=$extra_cmd" --bf16_ipex"
fi
if [ "${tasks}" != "" ]; then
extra_cmd=$extra_cmd" --tasks ${tasks}"
fi

echo $extra_cmd

if [ ${script} = "evaluate_clm.py" ];then
python -u ./${script} \
--model ${model_name_or_path} \
--output_dir ${tuned_checkpoint} \
--batch_size ${batch_size} \
--batch_size ${batch_size} \
${mode_cmd} \
${extra_cmd}
elif [ -z ${DATASET_CONFIG_NAME} ];then
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ function init_params {
batch_size=8
model_type="bert"
approach="PostTrainingStatic"
alpha=0.5
for var in "$@"
do
case $var in
Expand Down Expand Up @@ -85,9 +86,27 @@ function run_tuning {
approach="PostTrainingDynamic"
elif [ "${topology}" = "gpt_j_6b_clm_ipex" ]; then
script="evaluate_clm.py"
DATASET_NAME="lambada"
DATASET_NAME="NeelNanda/pile-10k"
model_name_or_path="/tf_dataset2/models/pytorch/gpt-j-6B"
approach="PostTrainingStatic"
elif [ "${topology}" = "opt_2.7b_clm_ipex" ]; then
script="evaluate_clm.py"
DATASET_NAME="NeelNanda/pile-10k"
model_name_or_path="facebook/opt-2.7b"
approach="PostTrainingStatic"
extra_cmd=$extra_cmd" --int8_bf16_mixed"
elif [ "${topology}" = "opt_6.7b_clm_ipex" ]; then
script="evaluate_clm.py"
DATASET_NAME="NeelNanda/pile-10k"
model_name_or_path="facebook/opt-6.7b"
approach="PostTrainingStatic"
extra_cmd=$extra_cmd" --int8_bf16_mixed"
elif [ "${topology}" = "llama_7b_clm_ipex" ]; then
script="evaluate_clm.py"
DATASET_NAME="NeelNanda/pile-10k"
model_name_or_path="decapoda-research/llama-7b-hf"
approach="PostTrainingStatic"
extra_cmd=$extra_cmd" --int8_bf16_mixed"
elif [ "${topology}" = "bert_mlm_static" ]; then
script="run_mlm.py"
DATASET_NAME="wikitext"
Expand Down Expand Up @@ -179,10 +198,10 @@ function run_tuning {
python -u ./${script} \
--model ${model_name_or_path} \
--output_dir ${tuned_checkpoint} \
--dataset ${DATASET_NAME} \
--dataset ${DATASET_NAME} \
--quantize \
--sq \
--alpha 0.7 \
--sq \
--alpha ${alpha} \
${extra_cmd}
elif [ -z ${DATASET_CONFIG_NAME} ];then
python -u ./${script} \
Expand Down

0 comments on commit 6a96085

Please sign in to comment.