Add hf example for onnxrt backend (#1342)

intel · Nov 9, 2022 · f4aeb5d · f4aeb5d
1 parent a2db276
commit f4aeb5d
Show file tree

Hide file tree

Showing 93 changed files with 2,760 additions and 24 deletions.
diff --git a/examples/.config/model_params_onnxrt.json b/examples/.config/model_params_onnxrt.json
@@ -46,7 +46,7 @@
       "new_benchmark": true
     },
     "bert_base_MRPC_static": {
-      "model_src_dir": "language_translation/bert/quantization/ptq",
+      "model_src_dir": "nlp/bert/quantization/ptq",
       "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC",
       "input_model": "/tf_dataset2/models/onnx/bert_base_MRPC/bert.onnx",
       "yaml": "bert_static.yaml",
@@ -55,7 +55,7 @@
       "new_benchmark": true
     },
     "bert_base_MRPC_dynamic": {
-      "model_src_dir": "language_translation/bert/quantization/ptq",
+      "model_src_dir": "nlp/bert/quantization/ptq",
       "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC",
       "input_model": "/tf_dataset2/models/onnx/bert_base_MRPC/bert.onnx",
       "yaml": "bert_dynamic.yaml",
@@ -64,7 +64,7 @@
       "new_benchmark": true
     },
     "distilbert_base_MRPC": {
-      "model_src_dir": "language_translation/distilbert/quantization/ptq",
+      "model_src_dir": "nlp/distilbert/quantization/ptq",
       "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC",
       "input_model": "/tf_dataset2/models/onnx/distilbert_base_MRPC/distilbert-base-uncased.onnx",
       "yaml": "distilbert.yaml",
@@ -73,7 +73,7 @@
       "new_benchmark": true
     },
     "mobilebert_MRPC": {
-      "model_src_dir": "language_translation/mobilebert/quantization/ptq",
+      "model_src_dir": "nlp/mobilebert/quantization/ptq",
       "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC",
       "input_model": "/tf_dataset2/models/onnx/mobilebert_MRPC/mobilebert-uncased.onnx",
       "yaml": "mobilebert.yaml",
@@ -82,7 +82,7 @@
       "new_benchmark": true
     },
     "roberta_base_MRPC": {
-      "model_src_dir": "language_translation/roberta/quantization/ptq",
+      "model_src_dir": "nlp/roberta/quantization/ptq",
       "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC",
       "input_model": "/tf_dataset2/models/onnx/roberta_base_MRPC/roberta-base.onnx",
       "yaml": "roberta.yaml",
@@ -118,7 +118,7 @@
       "new_benchmark": true
     },
     "bert_squad_model_zoo": {
-      "model_src_dir": "language_translation/onnx_model_zoo/bert-squad/quantization/ptq",
+      "model_src_dir": "nlp/onnx_model_zoo/bert-squad/quantization/ptq",
       "dataset_location": "/tf_dataset2/datasets/squad",
       "input_model": "/tf_dataset2/models/onnx/bert_squad/bert_squad_model_zoo.onnx",
       "yaml": "bert.yaml",
@@ -127,7 +127,7 @@
       "new_benchmark": true
     },
     "mobilebert_squad_mlperf": {
-      "model_src_dir": "language_translation/onnx_model_zoo/mobilebert/quantization/ptq",
+      "model_src_dir": "nlp/onnx_model_zoo/mobilebert/quantization/ptq",
       "dataset_location": "/tf_dataset2/datasets/squad",
       "input_model": "/tf_dataset2/models/onnx/mobilebert_squad/mobilebert_squad_mlperf.onnx",
       "yaml": "mobilebert.yaml",
@@ -136,7 +136,7 @@
       "new_benchmark": true
     },
     "gpt2_lm_head_wikitext_model_zoo": {
-      "model_src_dir": "language_translation/onnx_model_zoo/gpt2/quantization/ptq",
+      "model_src_dir": "nlp/onnx_model_zoo/gpt2/quantization/ptq",
       "dataset_location": "/tf_dataset2/datasets/wikitext/wikitext-2-raw/",
       "input_model": "/tf_dataset2/models/onnx/gpt2/gpt2_lm_head_wikitext_model_zoo.onnx",
       "yaml": "gpt2.yaml",
@@ -352,7 +352,7 @@
       "new_benchmark": true
     },
     "bert_base_MRPC_static_qdq": {
-      "model_src_dir": "language_translation/bert/quantization/ptq",
+      "model_src_dir": "nlp/bert/quantization/ptq",
       "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC",
       "input_model": "/tf_dataset2/models/onnx/bert_base_MRPC/bert.onnx",
       "yaml": "bert_qdq.yaml",
@@ -361,7 +361,7 @@
       "new_benchmark": true
     },
     "distilbert_base_MRPC_qdq": {
-      "model_src_dir": "language_translation/distilbert/quantization/ptq",
+      "model_src_dir": "nlp/distilbert/quantization/ptq",
       "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC",
       "input_model": "/tf_dataset2/models/onnx/distilbert_base_MRPC/distilbert-base-uncased.onnx",
       "yaml": "distilbert_qdq.yaml",
@@ -370,7 +370,7 @@
       "new_benchmark": true
     },
     "mobilebert_MRPC_qdq": {
-      "model_src_dir": "language_translation/mobilebert/quantization/ptq",
+      "model_src_dir": "nlp/mobilebert/quantization/ptq",
       "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC",
       "input_model": "/tf_dataset2/models/onnx/mobilebert_MRPC/mobilebert-uncased.onnx",
       "yaml": "mobilebert_qdq.yaml",
@@ -379,7 +379,7 @@
       "new_benchmark": true
     },
     "roberta_base_MRPC_qdq": {
-      "model_src_dir": "language_translation/roberta/quantization/ptq",
+      "model_src_dir": "nlp/roberta/quantization/ptq",
       "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC",
       "input_model": "/tf_dataset2/models/onnx/roberta_base_MRPC/roberta-base.onnx",
       "yaml": "roberta_qdq.yaml",
@@ -415,7 +415,7 @@
       "new_benchmark": true
     },
     "bert_squad_model_zoo_qdq": {
-      "model_src_dir": "language_translation/onnx_model_zoo/bert-squad/quantization/ptq",
+      "model_src_dir": "nlp/onnx_model_zoo/bert-squad/quantization/ptq",
       "dataset_location": "/tf_dataset2/datasets/squad",
       "input_model": "/tf_dataset2/models/onnx/bert_squad/bert_squad_model_zoo.onnx",
       "yaml": "bert_qdq.yaml",
@@ -424,7 +424,7 @@
       "new_benchmark": true
     },
     "mobilebert_squad_mlperf_qdq": {
-      "model_src_dir": "language_translation/onnx_model_zoo/mobilebert/quantization/ptq",
+      "model_src_dir": "nlp/onnx_model_zoo/mobilebert/quantization/ptq",
       "dataset_location": "/tf_dataset2/datasets/squad",
       "input_model": "/tf_dataset2/models/onnx/mobilebert_squad/mobilebert_squad_mlperf-13.onnx",
       "yaml": "mobilebert_qdq.yaml",
@@ -631,13 +631,103 @@
       "new_benchmark": true
     },
     "BiDAF": {
-      "model_src_dir": "language_translation/onnx_model_zoo/BiDAF/quantization/ptq",
+      "model_src_dir": "nlp/onnx_model_zoo/BiDAF/quantization/ptq",
       "dataset_location": "/tf_dataset2/datasets/squad/dev-v1.1.json",
       "input_model": "/tf_dataset2/models/onnx/BiDAF/bidaf-11.onnx",
       "yaml": "bidaf.yaml",
       "strategy": "basic",
       "batch_size": 1,
       "new_benchmark": true
+    },
+    "hf_bert-base-uncased_dynamic": {
+      "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq",
+      "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC",
+      "input_model": "/tf_dataset2/models/onnx/hf_bert-base-uncased_dynamic/bert-base-uncased-mrpc.onnx",
+      "yaml": "glue_dynamic.yaml",
+      "strategy": "basic",
+      "batch_size": 1,
+      "new_benchmark": true
+    },
+    "hf_roberta-base_dynamic": {
+      "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq",
+      "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC",
+      "input_model": "/tf_dataset2/models/onnx/hf_roberta-base_dynamic/roberta-base-mrpc.onnx",
+      "yaml": "glue_dynamic.yaml",
+      "strategy": "basic",
+      "batch_size": 1,
+      "new_benchmark": true
+    },
+    "hf_xlm-roberta-base_dynamic": {
+      "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq",
+      "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC",
+      "input_model": "/tf_dataset2/models/onnx/hf_xlm-roberta-base_dynamic/xlm-roberta-base-mrpc.onnx",
+      "yaml": "glue_dynamic.yaml",
+      "strategy": "basic",
+      "batch_size": 1,
+      "new_benchmark": true
+    },
+    "hf_camembert-base_dynamic": {
+      "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq",
+      "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC",
+      "input_model": "/tf_dataset2/models/onnx/hf_camembert-base_dynamic/camembert-base-mrpc.onnx",
+      "yaml": "glue_dynamic.yaml",
+      "strategy": "basic",
+      "batch_size": 1,
+      "new_benchmark": true
+    },
+    "hf_MiniLM-L12-H384-uncased_dynamic": {
+      "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq",
+      "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC",
+      "input_model": "/tf_dataset2/models/onnx/hf_MiniLM-L12-H384-uncased_dynamic/MiniLM-L12-H384-uncased-mrpc.onnx",
+      "yaml": "glue_dynamic.yaml",
+      "strategy": "basic",
+      "batch_size": 1,
+      "new_benchmark": true
+    },
+    "hf_distilbert-base-uncased_dynamic": {
+      "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq",
+      "dataset_location": "/tf_dataset/pytorch/glue_data/SST-2/",
+      "input_model": "/tf_dataset2/models/onnx/hf_distilbert-base-uncased_dynamic/distilbert-base-uncased-finetuned-sst-2-english.onnx",
+      "yaml": "glue_dynamic.yaml",
+      "strategy": "basic",
+      "batch_size": 1,
+      "new_benchmark": true
+    },
+    "hf_albert-base-v2_dynamic": {
+      "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq",
+      "dataset_location": "/tf_dataset/pytorch/glue_data/SST-2/",
+      "input_model": "/tf_dataset2/models/onnx/hf_albert-base-v2_dynamic/albert-base-v2-sst2.onnx",
+      "yaml": "glue_dynamic.yaml",
+      "strategy": "basic",
+      "batch_size": 1,
+      "new_benchmark": true
+    },
+    "hf_MiniLM-L6-H384-uncased_dynamic": {
+      "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq",
+      "dataset_location": "/tf_dataset/pytorch/glue_data/SST-2/",
+      "input_model": "/tf_dataset2/models/onnx/hf_MiniLM-L6-H384-uncased_dynamic/MiniLM-L6-H384-uncased-sst2.onnx",
+      "yaml": "glue_dynamic.yaml",
+      "strategy": "basic",
+      "batch_size": 1,
+      "new_benchmark": true
+    },
+    "hf_spanbert_dynamic": {
+      "model_src_dir": "nlp/huggingface_model/question_answering/quantization/ptq",
+      "dataset_location": "/tf_dataset2/datasets/squad",
+      "input_model": "/tf_dataset2/models/onnx/hf_spanbert_dynamic/spanbert-finetuned-squadv1.onnx",
+      "yaml": "qa_dynamic.yaml",
+      "strategy": "basic",
+      "batch_size": 1,
+      "new_benchmark": true
+    },
+    "hf_bert-base-multilingual-cased_dynamic": {
+      "model_src_dir": "nlp/huggingface_model/question_answering/quantization/ptq",
+      "dataset_location": "/tf_dataset2/datasets/squad",
+      "input_model": "/tf_dataset2/models/onnx/hf_bert-base-multilingual-cased_dynamic/bert-base-multilingual-cased-finetuned-squad.onnx",
+      "yaml": "qa_dynamic.yaml",
+      "strategy": "basic",
+      "batch_size": 1,
+      "new_benchmark": true
     }
   }
 }
diff --git a/examples/README.md b/examples/README.md
@@ -855,55 +855,115 @@ Intel® Neural Compressor validated examples with multiple compression technique
     <td>BERT base MRPC</td>
     <td>Natural Language Processing</td>
     <td>Post-Training Static Quantization</td>
-    <td><a href="./onnxrt/language_translation/bert/quantization/ptq">integerops</a> / <a href="./onnxrt/language_translation/bert/quantization/ptq">qdq</a></td>
+    <td><a href="./onnxrt/nlp/bert/quantization/ptq">integerops</a> / <a href="./onnxrt/nlp/bert/quantization/ptq">qdq</a></td>
   </tr>
   <tr>
     <td>BERT base MRPC</td>
     <td>Natural Language Processing</td>
     <td>Post-Training Dynamic Quantization</td>
-    <td><a href="./onnxrt/language_translation/bert/quantization/ptq">integerops</a></td>
+    <td><a href="./onnxrt/nlp/bert/quantization/ptq">integerops</a></td>
   </tr>
   <tr>
     <td>DistilBERT base MRPC</td>
     <td>Natural Language Processing</td>
     <td>Post-Training Dynamic / Static Quantization</td>
-    <td><a href="./onnxrt/language_translation/distilbert/quantization/ptq">integerops</a> / <a href="./onnxrt/language_translation/distilbert/quantization/ptq">qdq</a></td>
+    <td><a href="./onnxrt/nlp/distilbert/quantization/ptq">integerops</a> / <a href="./onnxrt/nlp/distilbert/quantization/ptq">qdq</a></td>
   </tr>
   <tr>
     <td>Mobile bert MRPC</td>
     <td>Natural Language Processing</td>
     <td>Post-Training Dynamic / Static Quantization</td>
-    <td><a href="./onnxrt/language_translation/mobilebert/quantization/ptq">integerops</a> / <a href="./onnxrt/language_translation/mobilebert/quantization/ptq">qdq</a></td>
+    <td><a href="./onnxrt/nlp/mobilebert/quantization/ptq">integerops</a> / <a href="./onnxrt/nlp/mobilebert/quantization/ptq">qdq</a></td>
   </tr>
   <tr>
     <td>Roberta base MRPC</td>
     <td>Natural Language Processing</td>
     <td>Post-Training Dynamic / Static Quantization</td>
-    <td><a href="./onnxrt/language_translation/roberta/quantization/ptq">integerops</a> / <a href="./onnxrt/language_translation/roberta/quantization/ptq">qdq</a></td>
+    <td><a href="./onnxrt/nlp/roberta/quantization/ptq">integerops</a> / <a href="./onnxrt/nlp/roberta/quantization/ptq">qdq</a></td>
   </tr>
   <tr>
     <td>BERT SQuAD</td>
     <td>Natural Language Processing</td>
     <td>Post-Training Dynamic / Static Quantization</td>
-    <td><a href="./onnxrt/language_translation/onnx_model_zoo/bert-squad/quantization/ptq">integerops</a> / <a href="./onnxrt/language_translation/onnx_model_zoo/bert-squad/quantization/ptq">qdq</a></td>
+    <td><a href="./onnxrt/nlp/bert-squad/quantization/ptq">integerops</a> / <a href="./onnxrt/nlp/bert-squad/quantization/ptq">qdq</a></td>
   </tr>
   <tr>
     <td>GPT2 lm head WikiText</td>
     <td>Natural Language Processing</td>
     <td>Post-Training Dynamic Quantization</td>
-    <td><a href="./onnxrt/language_translation/onnx_model_zoo/gpt2/quantization/ptq">integerops</a></td>
+    <td><a href="./onnxrt/nlp/gpb2/quantization/ptq">integerops</a></td>
   </tr>
   <tr>
     <td>MobileBERT SQuAD MLPerf</td>
     <td>Natural Language Processing</td>
     <td>Post-Training Dynamic / Static Quantization</td>
-    <td><a href="./onnxrt/language_translation/onnx_model_zoo/mobilebert/quantization/ptq">integerops</a> / <a href="./onnxrt/language_translation/onnx_model_zoo/mobilebert/quantization/ptq">qdq</a></td>
+    <td><a href="./onnxrt/nlp/mobilebert/quantization/ptq">integerops</a> / <a href="./onnxrt/nlp/mobilebert/quantization/ptq">qdq</a></td>
   </tr>
   <tr>
     <td>BiDAF</td>
     <td>Natural Language Processing</td>
     <td>Post-Training Dynamic Quantization</td>
-    <td><a href="./onnxrt/language_translation/onnx_model_zoo/BiDAF/quantization/ptq">integerops</a></td>
+    <td><a href="./onnxrt/nlp/onnx_model_zoo/BiDAF/quantization/ptq">integerops</a></td>
+  </tr>
+  <tr>
+    <td>BERT base uncased MRPC (HuggingFace)</td>
+    <td>Natural Language Processing</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq">qdq</a></td>
+  </tr>
+  <tr>
+    <td>Roberta base MRPC (HuggingFace)</td>
+    <td>Natural Language Processing</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq">qdq</a></td>
+  </tr>
+  <tr>
+    <td>XLM Roberta base MRPC (HuggingFace)</td>
+    <td>Natural Language Processing</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq">qdq</a></td>
+  </tr>
+  <tr>
+    <td>Camembert base MRPC (HuggingFace)</td>
+    <td>Natural Language Processing</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq">qdq</a></td>
+  </tr>
+  <tr>
+    <td>MiniLM L12 H384 uncased MRPC (HuggingFace)</td>
+    <td>Natural Language Processing</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq">qdq</a></td>
+  </tr>
+  <tr>
+    <td>Distilbert base uncased SST-2 (HuggingFace)</td>
+    <td>Natural Language Processing</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq">qdq</a></td>
+  </tr>
+  <tr>
+    <td>Albert base v2 SST-2 (HuggingFace)</td>
+    <td>Natural Language Processing</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq">qdq</a></td>
+  </tr>
+  <tr>
+    <td>MiniLM L6 H384 uncased SST-2 (HuggingFace)</td>
+    <td>Natural Language Processing</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq">qdq</a></td>
+  </tr>
+  <tr>
+    <td>Spanbert SQuAD (HuggingFace)</td>
+    <td>Natural Language Processing</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./onnxrt/nlp/huggingface_model/question_answering/quantization/ptq">qdq</a></td>
+  </tr>
+  <tr>
+    <td>Bert base multilingual cased SQuAD (HuggingFace)</td>
+    <td>Natural Language Processing</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./onnxrt/nlp/huggingface_model/question_answering/quantization/ptq">qdq</a></td>
   </tr>
   <tr>
     <td>SSD MobileNet V1</td>

diff --git a/...anslation/bert/quantization/ptq/README.md → ...nnxrt/nlp/bert/quantization/ptq/README.md b/...anslation/bert/quantization/ptq/README.md → ...nnxrt/nlp/bert/quantization/ptq/README.md
diff --git a/...n/bert/quantization/ptq/bert_dynamic.yaml → ...p/bert/quantization/ptq/bert_dynamic.yaml b/...n/bert/quantization/ptq/bert_dynamic.yaml → ...p/bert/quantization/ptq/bert_dynamic.yaml
diff --git a/...ation/bert/quantization/ptq/bert_qdq.yaml → ...t/nlp/bert/quantization/ptq/bert_qdq.yaml b/...ation/bert/quantization/ptq/bert_qdq.yaml → ...t/nlp/bert/quantization/ptq/bert_qdq.yaml
diff --git a/...on/bert/quantization/ptq/bert_static.yaml → ...lp/bert/quantization/ptq/bert_static.yaml b/...on/bert/quantization/ptq/bert_static.yaml → ...lp/bert/quantization/ptq/bert_static.yaml
diff --git a/...anslation/bert/quantization/ptq/export.py → ...nnxrt/nlp/bert/quantization/ptq/export.py b/...anslation/bert/quantization/ptq/export.py → ...nnxrt/nlp/bert/quantization/ptq/export.py
diff --git a/...translation/bert/quantization/ptq/main.py → .../onnxrt/nlp/bert/quantization/ptq/main.py b/...translation/bert/quantization/ptq/main.py → .../onnxrt/nlp/bert/quantization/ptq/main.py
diff --git a/...ion/bert/quantization/ptq/prepare_data.sh → ...nlp/bert/quantization/ptq/prepare_data.sh b/...ion/bert/quantization/ptq/prepare_data.sh → ...nlp/bert/quantization/ptq/prepare_data.sh
diff --git a/...on/bert/quantization/ptq/prepare_model.sh → ...lp/bert/quantization/ptq/prepare_model.sh b/...on/bert/quantization/ptq/prepare_model.sh → ...lp/bert/quantization/ptq/prepare_model.sh
diff --git a/...on/bert/quantization/ptq/requirements.txt → ...lp/bert/quantization/ptq/requirements.txt b/...on/bert/quantization/ptq/requirements.txt → ...lp/bert/quantization/ptq/requirements.txt
diff --git a/...on/bert/quantization/ptq/run_benchmark.sh → ...lp/bert/quantization/ptq/run_benchmark.sh b/...on/bert/quantization/ptq/run_benchmark.sh → ...lp/bert/quantization/ptq/run_benchmark.sh
diff --git a/...ation/bert/quantization/ptq/run_tuning.sh → ...t/nlp/bert/quantization/ptq/run_tuning.sh b/...ation/bert/quantization/ptq/run_tuning.sh → ...t/nlp/bert/quantization/ptq/run_tuning.sh
diff --git a/...stilbert/quantization/ptq/distilbert.yaml → ...stilbert/quantization/ptq/distilbert.yaml b/...stilbert/quantization/ptq/distilbert.yaml → ...stilbert/quantization/ptq/distilbert.yaml
diff --git a/...bert/quantization/ptq/distilbert_qdq.yaml → ...bert/quantization/ptq/distilbert_qdq.yaml b/...bert/quantization/ptq/distilbert_qdq.yaml → ...bert/quantization/ptq/distilbert_qdq.yaml
diff --git a/...ion/distilbert/quantization/ptq/export.py → ...nlp/distilbert/quantization/ptq/export.py b/...ion/distilbert/quantization/ptq/export.py → ...nlp/distilbert/quantization/ptq/export.py
diff --git a/...ation/distilbert/quantization/ptq/main.py → ...t/nlp/distilbert/quantization/ptq/main.py b/...ation/distilbert/quantization/ptq/main.py → ...t/nlp/distilbert/quantization/ptq/main.py
diff --git a/...stilbert/quantization/ptq/prepare_data.sh → ...stilbert/quantization/ptq/prepare_data.sh b/...stilbert/quantization/ptq/prepare_data.sh → ...stilbert/quantization/ptq/prepare_data.sh
diff --git a/...tilbert/quantization/ptq/prepare_model.sh → ...tilbert/quantization/ptq/prepare_model.sh b/...tilbert/quantization/ptq/prepare_model.sh → ...tilbert/quantization/ptq/prepare_model.sh
diff --git a/...ion/distilbert/quantization/ptq/readme.md → ...nlp/distilbert/quantization/ptq/readme.md b/...ion/distilbert/quantization/ptq/readme.md → ...nlp/distilbert/quantization/ptq/readme.md
diff --git a/...tilbert/quantization/ptq/requirements.txt → ...tilbert/quantization/ptq/requirements.txt b/...tilbert/quantization/ptq/requirements.txt → ...tilbert/quantization/ptq/requirements.txt
diff --git a/...tilbert/quantization/ptq/run_benchmark.sh → ...tilbert/quantization/ptq/run_benchmark.sh b/...tilbert/quantization/ptq/run_benchmark.sh → ...tilbert/quantization/ptq/run_benchmark.sh
diff --git a/...distilbert/quantization/ptq/run_tuning.sh → ...distilbert/quantization/ptq/run_tuning.sh b/...distilbert/quantization/ptq/run_tuning.sh → ...distilbert/quantization/ptq/run_tuning.sh
diff --git a/...ples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq/README.md b/...ples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq/README.md
@@ -0,0 +1,43 @@
+# Evaluate performance of ONNX Runtime(Huggingface Question Answering) 
+>ONNX runtime quantization is under active development. please use 1.6.0+ to get more quantization support. 
+
+This example load a language translation model and confirm its accuracy and speed based on [SQuAD]((https://rajpurkar.github.io/SQuAD-explorer/)) task. 
+
+### Environment
+Please use latest onnx and onnxruntime version.
+
+### Prepare dataset
+You should download SQuAD dataset from [SQuAD dataset link](https://rajpurkar.github.io/SQuAD-explorer/).
+
+### Prepare model
+
+Supported model identifier from [huggingface.co](https://huggingface.co/):
+
+|                 Model Identifier                |
+|:-----------------------------------------------:|
+|           mrm8488/spanbert-finetuned-squadv1          |
+|             salti/bert-base-multilingual-cased-finetuned-squad             |
+
+
+```bash
+python export.py --model_name_or_path=mrm8488/spanbert-finetuned-squadv1 \ # or other supported model identifier
+```
+
+### Quantization
+
+Dynamic quantize:
+
+```bash
+bash run_tuning.sh --input_model=/path/to/model \ # model path as *.onnx
+                   --output_model=/path/to/model_tune \
+                   --config=qa_dynamic.yaml
+```
+
+### Benchmark
+
+```bash
+bash run_benchmark.sh --input_model=/path/to/model \ # model path as *.onnx
+                      --config=qa_dynamic.yaml
+                      --mode=performance # or accuracy
+```
+