From 0887d8c15c28c1683c74df5539c7902d170cf9ff Mon Sep 17 00:00:00 2001 From: leonardozcm <597714572@qq.com> Date: Thu, 16 Sep 2021 15:55:06 +0800 Subject: [PATCH 1/2] glue finetune --- .../examples/text-classification/Benchmark.md | 64 ++++++++++++++++ .../examples/text-classification/bert.yaml | 74 +++++++++++++++++++ .../text-classification/run_glue_tune.py | 12 +++ 3 files changed, 150 insertions(+) create mode 100644 examples/pytorch/eager/huggingface_models/examples/text-classification/Benchmark.md create mode 100644 examples/pytorch/eager/huggingface_models/examples/text-classification/bert.yaml diff --git a/examples/pytorch/eager/huggingface_models/examples/text-classification/Benchmark.md b/examples/pytorch/eager/huggingface_models/examples/text-classification/Benchmark.md new file mode 100644 index 00000000000..ee87684f265 --- /dev/null +++ b/examples/pytorch/eager/huggingface_models/examples/text-classification/Benchmark.md @@ -0,0 +1,64 @@ +## original check: +eval_accuracy = 0.8382 +throughput = 40.09 + +``` +python run_glue.py --model_name_or_path bert-base-cased --task_name $TASK_NAME --do_train --do_eval --max_seq_length 128 --per_device_train_batch_size 32 --learning_rate 2e-5 --output_dir /tmp/$TASK_NAME/ --overwrite_output_dir +``` + +## Lpot fine-tune: +Accuracy = 0.81341 +throughput = 51.19 + +https://github.com/leonardozcm/neural-compressor/tree/master/examples/pytorch/eager/huggingface_models + +``` +export TASK_NAME=MRPC + +python run_glue.py \ + --model_name_or_path bert-base-cased \ + --task_name $TASK_NAME \ + --do_train \ + --do_eval \ + --max_seq_length 128 \ + --per_device_train_batch_size 32 \ + --learning_rate 2e-5 \ + --num_train_epochs 3 \ + --output_dir /tmp/$TASK_NAME/ + +bash run_tuning.sh --topology=bert_base_MRPC --dataset_location=/root/.cache/huggingface/datasets/glue/mrpc/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad --input_model=/tmp/$TASK_NAME/ + + +python run_glue_tune.py --tuned_checkpoint best_model --task_name MRPC --max_seq_length 128 --benchmark --int8 --output_dir /tmp/$TASK_NAME/ --model_name_or_path bert-base-cased +``` + + +## Lpot fine-tune + prune: + +pruning takes time > 15h + +``` +python examples/text-classification/run_glue_no_trainer_prune.py --task_name mnli --max_length 128 \ + --model_name_or_path Intel/bert-base-uncased-sparse-70-unstructured \ + --per_device_train_batch_size 32 --learning_rate 5e-5 --num_train_epochs 3 --output_dir /tmp/$TASK_NAME/ \ + --prune --config prune.yaml --output_model prune_model/model.pt --seed 5143 +``` + +## ONNX: +accuracy = 0.8603 +throughput = 53.237 + +refer to https://github.com/intel/neural-compressor/tree/1e295885782c05f8a980d74a88c17311e03cf7aa/examples/onnxrt/language_translation/bert +``` +bash prepare_data.sh --data_dir=./MRPC --task_name=$TASK_NAME +bash prepare_model.sh --input_dir=/root/.cache/huggingface/datasets/glue/mrpc/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad \ + --task_name=$TASK_NAME \ + --output_model=./bert.onnx # model path as *.onnx + +python run_glue_tune.py --task_name MRPC --max_seq_length 128 --output_dir /tmp/$TASK_NAME/ --model_name_or_path bert-base-cased +``` + +## bigdl-nano (jemalloc + omp): + + + diff --git a/examples/pytorch/eager/huggingface_models/examples/text-classification/bert.yaml b/examples/pytorch/eager/huggingface_models/examples/text-classification/bert.yaml new file mode 100644 index 00000000000..5af40a20d7f --- /dev/null +++ b/examples/pytorch/eager/huggingface_models/examples/text-classification/bert.yaml @@ -0,0 +1,74 @@ +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +version: 1.0 + +model: # mandatory. used to specify model specific information. + name: bert + framework: onnxrt_integerops # mandatory. possible values are tensorflow, mxnet, pytorch, pytorch_ipex, onnxrt_integerops and onnxrt_qlinearops. + +quantization: + approach: post_training_dynamic_quant # optional. default value is post_training_static_quant. + calibration: + sampling_size: 8, 16, 32 + dataloader: + batch_size: 8 + dataset: + GLUE: + data_dir: /path/to/dataset + model_name_or_path: bert-base-uncased + max_seq_length: 128 + task: mrpc + model_type: bert + dynamic_length: False + +evaluation: # optional. required if user doesn't provide eval_func in lpot.Quantization. + accuracy: # optional. required if user doesn't provide eval_func in lpot.Quantization. + metric: + GLUE: + task: mrpc # built-in metrics are topk, map, f1, allow user to register new metric. + dataloader: + batch_size: 32 + dataset: + GLUE: + data_dir: ./MRPC/MRPC + model_name_or_path: bert-base-uncased + max_seq_length: 128 + task: mrpc + model_type: bert + dynamic_length: False + performance: # optional. used to benchmark performance of passing model. + warmup: 10 + iteration: 100 + # configs: + # cores_per_instance: 4 + # num_of_instance: 7 + dataloader: + batch_size: 32 + dataset: + GLUE: + data_dir: ./MRPC/MRPC + model_name_or_path: bert-base-uncased + max_seq_length: 128 + task: mrpc + model_type: bert + dynamic_length: False + +tuning: + accuracy_criterion: + relative: 0.01 # optional. default value is relative, other value is absolute. this example allows relative accuracy loss: 1%. + exit_policy: + timeout: 0 # optional. tuning timeout (seconds). default value is 0 which means early stop. combine with max_trials field to decide when to exit. + random_seed: 9527 # optional. random seed for deterministic tuning. diff --git a/examples/pytorch/eager/huggingface_models/examples/text-classification/run_glue_tune.py b/examples/pytorch/eager/huggingface_models/examples/text-classification/run_glue_tune.py index 24afeff2c1c..2c52cef1892 100755 --- a/examples/pytorch/eager/huggingface_models/examples/text-classification/run_glue_tune.py +++ b/examples/pytorch/eager/huggingface_models/examples/text-classification/run_glue_tune.py @@ -560,6 +560,18 @@ def eval_func_for_lpot(model_tuned): else: item = label_list[item] writer.write(f"{index}\t{item}\n") + + if False: # True to turn on onnx evaluation + logger.info("***Onnx Evalue***") + import onnx + from lpot.experimental import Benchmark, common + + model = onnx.load("bert.onnx") + evaluator = Benchmark("bert.yaml") + evaluator.model = common.Model(model) + evalue_mode = "accuracy" # or performance + evaluator(evalue_mode) + return eval_results From 43fb84e64cf5894c41ec45808bfa0d2c501ac652 Mon Sep 17 00:00:00 2001 From: leonardozcm <597714572@qq.com> Date: Thu, 16 Sep 2021 16:24:52 +0800 Subject: [PATCH 2/2] bigdl-nano --- .../examples/text-classification/Benchmark.md | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/examples/pytorch/eager/huggingface_models/examples/text-classification/Benchmark.md b/examples/pytorch/eager/huggingface_models/examples/text-classification/Benchmark.md index ee87684f265..a8afdb2af83 100644 --- a/examples/pytorch/eager/huggingface_models/examples/text-classification/Benchmark.md +++ b/examples/pytorch/eager/huggingface_models/examples/text-classification/Benchmark.md @@ -1,10 +1,15 @@ ## original check: eval_accuracy = 0.8382 -throughput = 40.09 +throughput = 32.20 +You need to train the pretrained-model before evaluation, ``` python run_glue.py --model_name_or_path bert-base-cased --task_name $TASK_NAME --do_train --do_eval --max_seq_length 128 --per_device_train_batch_size 32 --learning_rate 2e-5 --output_dir /tmp/$TASK_NAME/ --overwrite_output_dir ``` +Alternately, you could also specify --model_name_or_path to the directory of local .bin model to skip training. +``` +python run_glue.py --model_name_or_path /tmp/MRPC --task_name $TASK_NAME --do_eval --max_seq_length 128 --per_device_train_batch_size 32 --learning_rate 2e-5 --output_dir /tmp/$TASK_NAME/ --overwrite_output_dir +``` ## Lpot fine-tune: Accuracy = 0.81341 @@ -59,6 +64,9 @@ python run_glue_tune.py --task_name MRPC --max_seq_length 128 --output_dir /tm ``` ## bigdl-nano (jemalloc + omp): - - +accuracy = 0.8382 +throughput = 59.783 +``` +bigdl-nano-init python run_glue.py --model_name_or_path /tmp/MRPC --task_name $TASK_NAME --do_eval --max_seq_length 128 --per_device_train_batch_size 32 --learning_rate 2e-5 --output_dir /tmp/$TASK_NAME/ --overwrite_output_dir +```