intel · leonardozcm · Sep 16, 2021 · Sep 16, 2021
diff --git a/...ples/pytorch/eager/huggingface_models/examples/text-classification/Benchmark.md b/...ples/pytorch/eager/huggingface_models/examples/text-classification/Benchmark.md
@@ -0,0 +1,72 @@
+## original check:
+eval_accuracy = 0.8382
+throughput = 32.20
+
+You need to train the pretrained-model before evaluation,
+```
+python run_glue.py  --model_name_or_path bert-base-cased   --task_name $TASK_NAME   --do_train  --do_eval  --max_seq_length 128  --per_device_train_batch_size 32   --learning_rate 2e-5   --output_dir /tmp/$TASK_NAME/ --overwrite_output_dir
+```
+Alternately, you could also specify --model_name_or_path to the directory of local .bin model to skip training.
+```
+python run_glue.py  --model_name_or_path /tmp/MRPC   --task_name $TASK_NAME   --do_eval  --max_seq_length 128  --per_device_train_batch_size 32   --learning_rate 2e-5   --output_dir /tmp/$TASK_NAME/ --overwrite_output_dir
+```
+
+## Lpot fine-tune:
+Accuracy = 0.81341
+throughput = 51.19
+
+https://github.com/leonardozcm/neural-compressor/tree/master/examples/pytorch/eager/huggingface_models
+
+```
+export TASK_NAME=MRPC
+
+python run_glue.py \
+  --model_name_or_path bert-base-cased \
+  --task_name $TASK_NAME \
+  --do_train \
+  --do_eval \
+  --max_seq_length 128 \
+  --per_device_train_batch_size 32 \
+  --learning_rate 2e-5 \
+  --num_train_epochs 3 \
+  --output_dir /tmp/$TASK_NAME/
+
+bash run_tuning.sh --topology=bert_base_MRPC --dataset_location=/root/.cache/huggingface/datasets/glue/mrpc/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad --input_model=/tmp/$TASK_NAME/
+
+
+python run_glue_tune.py --tuned_checkpoint best_model --task_name MRPC --max_seq_length 128 --benchmark --int8 --output_dir /tmp/$TASK_NAME/ --model_name_or_path bert-base-cased
+```
+
+
+## Lpot fine-tune + prune:
+
+pruning takes time > 15h
+
+```
+python examples/text-classification/run_glue_no_trainer_prune.py --task_name mnli --max_length 128 \
+       --model_name_or_path Intel/bert-base-uncased-sparse-70-unstructured \
+       --per_device_train_batch_size 32 --learning_rate 5e-5 --num_train_epochs 3 --output_dir /tmp/$TASK_NAME/ \
+       --prune --config prune.yaml --output_model prune_model/model.pt --seed 5143
+```
+
+## ONNX:
+accuracy = 0.8603
+throughput = 53.237
+
+refer to https://github.com/intel/neural-compressor/tree/1e295885782c05f8a980d74a88c17311e03cf7aa/examples/onnxrt/language_translation/bert
+```
+bash prepare_data.sh --data_dir=./MRPC --task_name=$TASK_NAME
+bash prepare_model.sh --input_dir=/root/.cache/huggingface/datasets/glue/mrpc/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad \
+                      --task_name=$TASK_NAME \
+                      --output_model=./bert.onnx # model path as *.onnx
+
+python run_glue_tune.py  --task_name MRPC --max_seq_length 128  --output_dir /tmp/$TASK_NAME/ --model_name_or_path bert-base-cased
+```
+
+## bigdl-nano (jemalloc + omp):
+accuracy = 0.8382
+throughput = 59.783
+```
+bigdl-nano-init python run_glue.py  --model_name_or_path /tmp/MRPC   --task_name $TASK_NAME   --do_eval  --max_seq_length 128  --per_device_train_batch_size 32   --learning_rate 2e-5   --output_dir /tmp/$TASK_NAME/ --overwrite_output_dir
+```
+
diff --git a/examples/pytorch/eager/huggingface_models/examples/text-classification/bert.yaml b/examples/pytorch/eager/huggingface_models/examples/text-classification/bert.yaml
@@ -0,0 +1,74 @@
+#
+# Copyright (c) 2021 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+version: 1.0
+
+model:                                               # mandatory. used to specify model specific information.
+  name: bert 
+  framework: onnxrt_integerops                       # mandatory. possible values are tensorflow, mxnet, pytorch, pytorch_ipex, onnxrt_integerops and onnxrt_qlinearops.
+
+quantization:
+  approach: post_training_dynamic_quant              # optional. default value is post_training_static_quant.                                   
+  calibration:
+    sampling_size: 8, 16, 32
+    dataloader:
+      batch_size: 8
+      dataset:
+        GLUE:
+          data_dir: /path/to/dataset
+          model_name_or_path: bert-base-uncased
+          max_seq_length: 128
+          task: mrpc
+          model_type: bert
+          dynamic_length: False
+
+evaluation:                                          # optional. required if user doesn't provide eval_func in lpot.Quantization.
+  accuracy:                                          # optional. required if user doesn't provide eval_func in lpot.Quantization.
+    metric:
+      GLUE: 
+        task: mrpc                                        # built-in metrics are topk, map, f1, allow user to register new metric.
+    dataloader:
+      batch_size: 32
+      dataset:
+        GLUE:
+          data_dir: ./MRPC/MRPC
+          model_name_or_path: bert-base-uncased
+          max_seq_length: 128
+          task: mrpc
+          model_type: bert
+          dynamic_length: False 
+  performance:                                       # optional. used to benchmark performance of passing model.
+    warmup: 10
+    iteration: 100
+    # configs:
+    #   cores_per_instance: 4
+    #   num_of_instance: 7
+    dataloader:
+      batch_size: 32
+      dataset:
+        GLUE:
+          data_dir: ./MRPC/MRPC
+          model_name_or_path: bert-base-uncased
+          max_seq_length: 128
+          task: mrpc
+          model_type: bert
+          dynamic_length: False
+
+tuning:
+  accuracy_criterion:
+    relative:  0.01                                  # optional. default value is relative, other value is absolute. this example allows relative accuracy loss: 1%.
+  exit_policy:
+    timeout: 0                                       # optional. tuning timeout (seconds). default value is 0 which means early stop. combine with max_trials field to decide when to exit.
+  random_seed: 9527                                  # optional. random seed for deterministic tuning.
diff --git a/examples/pytorch/eager/huggingface_models/examples/text-classification/run_glue_tune.py b/examples/pytorch/eager/huggingface_models/examples/text-classification/run_glue_tune.py
@@ -560,6 +560,18 @@ def eval_func_for_lpot(model_tuned):
                         else:
                             item = label_list[item]
                             writer.write(f"{index}\t{item}\n")
+
+    if False: # True to turn on onnx evaluation
+        logger.info("***Onnx Evalue***")
+        import onnx
+        from lpot.experimental import Benchmark, common
+
+        model = onnx.load("bert.onnx")
+        evaluator = Benchmark("bert.yaml")
+        evaluator.model = common.Model(model)
+        evalue_mode = "accuracy" # or performance 
+        evaluator(evalue_mode)
+
     return eval_results