Skip to content

Commit

Permalink
Add hf example for onnxrt backend (#1342)
Browse files Browse the repository at this point in the history
  • Loading branch information
mengniwang95 committed Nov 9, 2022
1 parent a2db276 commit f4aeb5d
Show file tree
Hide file tree
Showing 93 changed files with 2,760 additions and 24 deletions.
120 changes: 105 additions & 15 deletions examples/.config/model_params_onnxrt.json
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
"new_benchmark": true
},
"bert_base_MRPC_static": {
"model_src_dir": "language_translation/bert/quantization/ptq",
"model_src_dir": "nlp/bert/quantization/ptq",
"dataset_location": "/tf_dataset/pytorch/glue_data/MRPC",
"input_model": "/tf_dataset2/models/onnx/bert_base_MRPC/bert.onnx",
"yaml": "bert_static.yaml",
Expand All @@ -55,7 +55,7 @@
"new_benchmark": true
},
"bert_base_MRPC_dynamic": {
"model_src_dir": "language_translation/bert/quantization/ptq",
"model_src_dir": "nlp/bert/quantization/ptq",
"dataset_location": "/tf_dataset/pytorch/glue_data/MRPC",
"input_model": "/tf_dataset2/models/onnx/bert_base_MRPC/bert.onnx",
"yaml": "bert_dynamic.yaml",
Expand All @@ -64,7 +64,7 @@
"new_benchmark": true
},
"distilbert_base_MRPC": {
"model_src_dir": "language_translation/distilbert/quantization/ptq",
"model_src_dir": "nlp/distilbert/quantization/ptq",
"dataset_location": "/tf_dataset/pytorch/glue_data/MRPC",
"input_model": "/tf_dataset2/models/onnx/distilbert_base_MRPC/distilbert-base-uncased.onnx",
"yaml": "distilbert.yaml",
Expand All @@ -73,7 +73,7 @@
"new_benchmark": true
},
"mobilebert_MRPC": {
"model_src_dir": "language_translation/mobilebert/quantization/ptq",
"model_src_dir": "nlp/mobilebert/quantization/ptq",
"dataset_location": "/tf_dataset/pytorch/glue_data/MRPC",
"input_model": "/tf_dataset2/models/onnx/mobilebert_MRPC/mobilebert-uncased.onnx",
"yaml": "mobilebert.yaml",
Expand All @@ -82,7 +82,7 @@
"new_benchmark": true
},
"roberta_base_MRPC": {
"model_src_dir": "language_translation/roberta/quantization/ptq",
"model_src_dir": "nlp/roberta/quantization/ptq",
"dataset_location": "/tf_dataset/pytorch/glue_data/MRPC",
"input_model": "/tf_dataset2/models/onnx/roberta_base_MRPC/roberta-base.onnx",
"yaml": "roberta.yaml",
Expand Down Expand Up @@ -118,7 +118,7 @@
"new_benchmark": true
},
"bert_squad_model_zoo": {
"model_src_dir": "language_translation/onnx_model_zoo/bert-squad/quantization/ptq",
"model_src_dir": "nlp/onnx_model_zoo/bert-squad/quantization/ptq",
"dataset_location": "/tf_dataset2/datasets/squad",
"input_model": "/tf_dataset2/models/onnx/bert_squad/bert_squad_model_zoo.onnx",
"yaml": "bert.yaml",
Expand All @@ -127,7 +127,7 @@
"new_benchmark": true
},
"mobilebert_squad_mlperf": {
"model_src_dir": "language_translation/onnx_model_zoo/mobilebert/quantization/ptq",
"model_src_dir": "nlp/onnx_model_zoo/mobilebert/quantization/ptq",
"dataset_location": "/tf_dataset2/datasets/squad",
"input_model": "/tf_dataset2/models/onnx/mobilebert_squad/mobilebert_squad_mlperf.onnx",
"yaml": "mobilebert.yaml",
Expand All @@ -136,7 +136,7 @@
"new_benchmark": true
},
"gpt2_lm_head_wikitext_model_zoo": {
"model_src_dir": "language_translation/onnx_model_zoo/gpt2/quantization/ptq",
"model_src_dir": "nlp/onnx_model_zoo/gpt2/quantization/ptq",
"dataset_location": "/tf_dataset2/datasets/wikitext/wikitext-2-raw/",
"input_model": "/tf_dataset2/models/onnx/gpt2/gpt2_lm_head_wikitext_model_zoo.onnx",
"yaml": "gpt2.yaml",
Expand Down Expand Up @@ -352,7 +352,7 @@
"new_benchmark": true
},
"bert_base_MRPC_static_qdq": {
"model_src_dir": "language_translation/bert/quantization/ptq",
"model_src_dir": "nlp/bert/quantization/ptq",
"dataset_location": "/tf_dataset/pytorch/glue_data/MRPC",
"input_model": "/tf_dataset2/models/onnx/bert_base_MRPC/bert.onnx",
"yaml": "bert_qdq.yaml",
Expand All @@ -361,7 +361,7 @@
"new_benchmark": true
},
"distilbert_base_MRPC_qdq": {
"model_src_dir": "language_translation/distilbert/quantization/ptq",
"model_src_dir": "nlp/distilbert/quantization/ptq",
"dataset_location": "/tf_dataset/pytorch/glue_data/MRPC",
"input_model": "/tf_dataset2/models/onnx/distilbert_base_MRPC/distilbert-base-uncased.onnx",
"yaml": "distilbert_qdq.yaml",
Expand All @@ -370,7 +370,7 @@
"new_benchmark": true
},
"mobilebert_MRPC_qdq": {
"model_src_dir": "language_translation/mobilebert/quantization/ptq",
"model_src_dir": "nlp/mobilebert/quantization/ptq",
"dataset_location": "/tf_dataset/pytorch/glue_data/MRPC",
"input_model": "/tf_dataset2/models/onnx/mobilebert_MRPC/mobilebert-uncased.onnx",
"yaml": "mobilebert_qdq.yaml",
Expand All @@ -379,7 +379,7 @@
"new_benchmark": true
},
"roberta_base_MRPC_qdq": {
"model_src_dir": "language_translation/roberta/quantization/ptq",
"model_src_dir": "nlp/roberta/quantization/ptq",
"dataset_location": "/tf_dataset/pytorch/glue_data/MRPC",
"input_model": "/tf_dataset2/models/onnx/roberta_base_MRPC/roberta-base.onnx",
"yaml": "roberta_qdq.yaml",
Expand Down Expand Up @@ -415,7 +415,7 @@
"new_benchmark": true
},
"bert_squad_model_zoo_qdq": {
"model_src_dir": "language_translation/onnx_model_zoo/bert-squad/quantization/ptq",
"model_src_dir": "nlp/onnx_model_zoo/bert-squad/quantization/ptq",
"dataset_location": "/tf_dataset2/datasets/squad",
"input_model": "/tf_dataset2/models/onnx/bert_squad/bert_squad_model_zoo.onnx",
"yaml": "bert_qdq.yaml",
Expand All @@ -424,7 +424,7 @@
"new_benchmark": true
},
"mobilebert_squad_mlperf_qdq": {
"model_src_dir": "language_translation/onnx_model_zoo/mobilebert/quantization/ptq",
"model_src_dir": "nlp/onnx_model_zoo/mobilebert/quantization/ptq",
"dataset_location": "/tf_dataset2/datasets/squad",
"input_model": "/tf_dataset2/models/onnx/mobilebert_squad/mobilebert_squad_mlperf-13.onnx",
"yaml": "mobilebert_qdq.yaml",
Expand Down Expand Up @@ -631,13 +631,103 @@
"new_benchmark": true
},
"BiDAF": {
"model_src_dir": "language_translation/onnx_model_zoo/BiDAF/quantization/ptq",
"model_src_dir": "nlp/onnx_model_zoo/BiDAF/quantization/ptq",
"dataset_location": "/tf_dataset2/datasets/squad/dev-v1.1.json",
"input_model": "/tf_dataset2/models/onnx/BiDAF/bidaf-11.onnx",
"yaml": "bidaf.yaml",
"strategy": "basic",
"batch_size": 1,
"new_benchmark": true
},
"hf_bert-base-uncased_dynamic": {
"model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq",
"dataset_location": "/tf_dataset/pytorch/glue_data/MRPC",
"input_model": "/tf_dataset2/models/onnx/hf_bert-base-uncased_dynamic/bert-base-uncased-mrpc.onnx",
"yaml": "glue_dynamic.yaml",
"strategy": "basic",
"batch_size": 1,
"new_benchmark": true
},
"hf_roberta-base_dynamic": {
"model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq",
"dataset_location": "/tf_dataset/pytorch/glue_data/MRPC",
"input_model": "/tf_dataset2/models/onnx/hf_roberta-base_dynamic/roberta-base-mrpc.onnx",
"yaml": "glue_dynamic.yaml",
"strategy": "basic",
"batch_size": 1,
"new_benchmark": true
},
"hf_xlm-roberta-base_dynamic": {
"model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq",
"dataset_location": "/tf_dataset/pytorch/glue_data/MRPC",
"input_model": "/tf_dataset2/models/onnx/hf_xlm-roberta-base_dynamic/xlm-roberta-base-mrpc.onnx",
"yaml": "glue_dynamic.yaml",
"strategy": "basic",
"batch_size": 1,
"new_benchmark": true
},
"hf_camembert-base_dynamic": {
"model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq",
"dataset_location": "/tf_dataset/pytorch/glue_data/MRPC",
"input_model": "/tf_dataset2/models/onnx/hf_camembert-base_dynamic/camembert-base-mrpc.onnx",
"yaml": "glue_dynamic.yaml",
"strategy": "basic",
"batch_size": 1,
"new_benchmark": true
},
"hf_MiniLM-L12-H384-uncased_dynamic": {
"model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq",
"dataset_location": "/tf_dataset/pytorch/glue_data/MRPC",
"input_model": "/tf_dataset2/models/onnx/hf_MiniLM-L12-H384-uncased_dynamic/MiniLM-L12-H384-uncased-mrpc.onnx",
"yaml": "glue_dynamic.yaml",
"strategy": "basic",
"batch_size": 1,
"new_benchmark": true
},
"hf_distilbert-base-uncased_dynamic": {
"model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq",
"dataset_location": "/tf_dataset/pytorch/glue_data/SST-2/",
"input_model": "/tf_dataset2/models/onnx/hf_distilbert-base-uncased_dynamic/distilbert-base-uncased-finetuned-sst-2-english.onnx",
"yaml": "glue_dynamic.yaml",
"strategy": "basic",
"batch_size": 1,
"new_benchmark": true
},
"hf_albert-base-v2_dynamic": {
"model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq",
"dataset_location": "/tf_dataset/pytorch/glue_data/SST-2/",
"input_model": "/tf_dataset2/models/onnx/hf_albert-base-v2_dynamic/albert-base-v2-sst2.onnx",
"yaml": "glue_dynamic.yaml",
"strategy": "basic",
"batch_size": 1,
"new_benchmark": true
},
"hf_MiniLM-L6-H384-uncased_dynamic": {
"model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq",
"dataset_location": "/tf_dataset/pytorch/glue_data/SST-2/",
"input_model": "/tf_dataset2/models/onnx/hf_MiniLM-L6-H384-uncased_dynamic/MiniLM-L6-H384-uncased-sst2.onnx",
"yaml": "glue_dynamic.yaml",
"strategy": "basic",
"batch_size": 1,
"new_benchmark": true
},
"hf_spanbert_dynamic": {
"model_src_dir": "nlp/huggingface_model/question_answering/quantization/ptq",
"dataset_location": "/tf_dataset2/datasets/squad",
"input_model": "/tf_dataset2/models/onnx/hf_spanbert_dynamic/spanbert-finetuned-squadv1.onnx",
"yaml": "qa_dynamic.yaml",
"strategy": "basic",
"batch_size": 1,
"new_benchmark": true
},
"hf_bert-base-multilingual-cased_dynamic": {
"model_src_dir": "nlp/huggingface_model/question_answering/quantization/ptq",
"dataset_location": "/tf_dataset2/datasets/squad",
"input_model": "/tf_dataset2/models/onnx/hf_bert-base-multilingual-cased_dynamic/bert-base-multilingual-cased-finetuned-squad.onnx",
"yaml": "qa_dynamic.yaml",
"strategy": "basic",
"batch_size": 1,
"new_benchmark": true
}
}
}
78 changes: 69 additions & 9 deletions examples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -855,55 +855,115 @@ Intel® Neural Compressor validated examples with multiple compression technique
<td>BERT base MRPC</td>
<td>Natural Language Processing</td>
<td>Post-Training Static Quantization</td>
<td><a href="./onnxrt/language_translation/bert/quantization/ptq">integerops</a> / <a href="./onnxrt/language_translation/bert/quantization/ptq">qdq</a></td>
<td><a href="./onnxrt/nlp/bert/quantization/ptq">integerops</a> / <a href="./onnxrt/nlp/bert/quantization/ptq">qdq</a></td>
</tr>
<tr>
<td>BERT base MRPC</td>
<td>Natural Language Processing</td>
<td>Post-Training Dynamic Quantization</td>
<td><a href="./onnxrt/language_translation/bert/quantization/ptq">integerops</a></td>
<td><a href="./onnxrt/nlp/bert/quantization/ptq">integerops</a></td>
</tr>
<tr>
<td>DistilBERT base MRPC</td>
<td>Natural Language Processing</td>
<td>Post-Training Dynamic / Static Quantization</td>
<td><a href="./onnxrt/language_translation/distilbert/quantization/ptq">integerops</a> / <a href="./onnxrt/language_translation/distilbert/quantization/ptq">qdq</a></td>
<td><a href="./onnxrt/nlp/distilbert/quantization/ptq">integerops</a> / <a href="./onnxrt/nlp/distilbert/quantization/ptq">qdq</a></td>
</tr>
<tr>
<td>Mobile bert MRPC</td>
<td>Natural Language Processing</td>
<td>Post-Training Dynamic / Static Quantization</td>
<td><a href="./onnxrt/language_translation/mobilebert/quantization/ptq">integerops</a> / <a href="./onnxrt/language_translation/mobilebert/quantization/ptq">qdq</a></td>
<td><a href="./onnxrt/nlp/mobilebert/quantization/ptq">integerops</a> / <a href="./onnxrt/nlp/mobilebert/quantization/ptq">qdq</a></td>
</tr>
<tr>
<td>Roberta base MRPC</td>
<td>Natural Language Processing</td>
<td>Post-Training Dynamic / Static Quantization</td>
<td><a href="./onnxrt/language_translation/roberta/quantization/ptq">integerops</a> / <a href="./onnxrt/language_translation/roberta/quantization/ptq">qdq</a></td>
<td><a href="./onnxrt/nlp/roberta/quantization/ptq">integerops</a> / <a href="./onnxrt/nlp/roberta/quantization/ptq">qdq</a></td>
</tr>
<tr>
<td>BERT SQuAD</td>
<td>Natural Language Processing</td>
<td>Post-Training Dynamic / Static Quantization</td>
<td><a href="./onnxrt/language_translation/onnx_model_zoo/bert-squad/quantization/ptq">integerops</a> / <a href="./onnxrt/language_translation/onnx_model_zoo/bert-squad/quantization/ptq">qdq</a></td>
<td><a href="./onnxrt/nlp/bert-squad/quantization/ptq">integerops</a> / <a href="./onnxrt/nlp/bert-squad/quantization/ptq">qdq</a></td>
</tr>
<tr>
<td>GPT2 lm head WikiText</td>
<td>Natural Language Processing</td>
<td>Post-Training Dynamic Quantization</td>
<td><a href="./onnxrt/language_translation/onnx_model_zoo/gpt2/quantization/ptq">integerops</a></td>
<td><a href="./onnxrt/nlp/gpb2/quantization/ptq">integerops</a></td>
</tr>
<tr>
<td>MobileBERT SQuAD MLPerf</td>
<td>Natural Language Processing</td>
<td>Post-Training Dynamic / Static Quantization</td>
<td><a href="./onnxrt/language_translation/onnx_model_zoo/mobilebert/quantization/ptq">integerops</a> / <a href="./onnxrt/language_translation/onnx_model_zoo/mobilebert/quantization/ptq">qdq</a></td>
<td><a href="./onnxrt/nlp/mobilebert/quantization/ptq">integerops</a> / <a href="./onnxrt/nlp/mobilebert/quantization/ptq">qdq</a></td>
</tr>
<tr>
<td>BiDAF</td>
<td>Natural Language Processing</td>
<td>Post-Training Dynamic Quantization</td>
<td><a href="./onnxrt/language_translation/onnx_model_zoo/BiDAF/quantization/ptq">integerops</a></td>
<td><a href="./onnxrt/nlp/onnx_model_zoo/BiDAF/quantization/ptq">integerops</a></td>
</tr>
<tr>
<td>BERT base uncased MRPC (HuggingFace)</td>
<td>Natural Language Processing</td>
<td>Post-Training Static Quantization</td>
<td><a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq">qdq</a></td>
</tr>
<tr>
<td>Roberta base MRPC (HuggingFace)</td>
<td>Natural Language Processing</td>
<td>Post-Training Static Quantization</td>
<td><a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq">qdq</a></td>
</tr>
<tr>
<td>XLM Roberta base MRPC (HuggingFace)</td>
<td>Natural Language Processing</td>
<td>Post-Training Static Quantization</td>
<td><a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq">qdq</a></td>
</tr>
<tr>
<td>Camembert base MRPC (HuggingFace)</td>
<td>Natural Language Processing</td>
<td>Post-Training Static Quantization</td>
<td><a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq">qdq</a></td>
</tr>
<tr>
<td>MiniLM L12 H384 uncased MRPC (HuggingFace)</td>
<td>Natural Language Processing</td>
<td>Post-Training Static Quantization</td>
<td><a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq">qdq</a></td>
</tr>
<tr>
<td>Distilbert base uncased SST-2 (HuggingFace)</td>
<td>Natural Language Processing</td>
<td>Post-Training Static Quantization</td>
<td><a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq">qdq</a></td>
</tr>
<tr>
<td>Albert base v2 SST-2 (HuggingFace)</td>
<td>Natural Language Processing</td>
<td>Post-Training Static Quantization</td>
<td><a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq">qdq</a></td>
</tr>
<tr>
<td>MiniLM L6 H384 uncased SST-2 (HuggingFace)</td>
<td>Natural Language Processing</td>
<td>Post-Training Static Quantization</td>
<td><a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq">qdq</a></td>
</tr>
<tr>
<td>Spanbert SQuAD (HuggingFace)</td>
<td>Natural Language Processing</td>
<td>Post-Training Static Quantization</td>
<td><a href="./onnxrt/nlp/huggingface_model/question_answering/quantization/ptq">qdq</a></td>
</tr>
<tr>
<td>Bert base multilingual cased SQuAD (HuggingFace)</td>
<td>Natural Language Processing</td>
<td>Post-Training Static Quantization</td>
<td><a href="./onnxrt/nlp/huggingface_model/question_answering/quantization/ptq">qdq</a></td>
</tr>
<tr>
<td>SSD MobileNet V1</td>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# Evaluate performance of ONNX Runtime(Huggingface Question Answering)
>ONNX runtime quantization is under active development. please use 1.6.0+ to get more quantization support.
This example load a language translation model and confirm its accuracy and speed based on [SQuAD]((https://rajpurkar.github.io/SQuAD-explorer/)) task.

### Environment
Please use latest onnx and onnxruntime version.

### Prepare dataset
You should download SQuAD dataset from [SQuAD dataset link](https://rajpurkar.github.io/SQuAD-explorer/).

### Prepare model

Supported model identifier from [huggingface.co](https://huggingface.co/):

| Model Identifier |
|:-----------------------------------------------:|
| mrm8488/spanbert-finetuned-squadv1 |
| salti/bert-base-multilingual-cased-finetuned-squad |


```bash
python export.py --model_name_or_path=mrm8488/spanbert-finetuned-squadv1 \ # or other supported model identifier
```

### Quantization

Dynamic quantize:

```bash
bash run_tuning.sh --input_model=/path/to/model \ # model path as *.onnx
--output_model=/path/to/model_tune \
--config=qa_dynamic.yaml
```

### Benchmark

```bash
bash run_benchmark.sh --input_model=/path/to/model \ # model path as *.onnx
--config=qa_dynamic.yaml
--mode=performance # or accuracy
```

Loading

0 comments on commit f4aeb5d

Please sign in to comment.