Enable ONNXRT layoutlmft & layoutlmv3 examples (#837)

Signed-off-by: yuwenzho <yuwen.zhou@intel.com>
intel · May 8, 2023 · 3c1e894 · 3c1e894
1 parent 552459f
commit 3c1e894
Show file tree

Hide file tree

Showing 32 changed files with 2,943 additions and 3 deletions.
diff --git a/.azure-pipelines/scripts/codeScan/pyspelling/inc_dict.txt b/.azure-pipelines/scripts/codeScan/pyspelling/inc_dict.txt
@@ -2600,3 +2600,10 @@ cdrdv
 NeuralCompressor
 zp
 TensorflowTopK
+FUNSD
+LayoutLMv
+layoutlmv
+funsd
+layoutlmft
+nielsr
+HYPJUDY
diff --git a/examples/.config/model_params_onnxrt.json b/examples/.config/model_params_onnxrt.json
@@ -783,6 +783,34 @@
       "input_model": "/tf_dataset2/models/onnx/hf_distilgpt2/distilgpt2.onnx",
       "main_script": "main.py",
       "batch_size": 1
+    },
+    "hf_layoutlmv3_dynamic": {
+      "model_src_dir": "nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_dynamic",
+      "dataset_location": "",
+      "input_model": "/tf_dataset2/models/onnx/hf_layoutlmv3/layoutlmv3-base-finetuned-funsd.onnx",
+      "main_script": "main.py",
+      "batch_size": 1
+    },
+    "hf_layoutlmv3": {
+      "model_src_dir": "nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_static",
+      "dataset_location": "",
+      "input_model": "/tf_dataset2/models/onnx/hf_layoutlmv3/layoutlmv3-base-finetuned-funsd.onnx",
+      "main_script": "main.py",
+      "batch_size": 1
+    },
+    "hf_layoutlmft_dynamic": {
+      "model_src_dir": "nlp/huggingface_model/token_classification/layoutlmft/quantization/ptq_dynamic",
+      "dataset_location": "",
+      "input_model": "/tf_dataset2/models/onnx/hf_layoutlmft/layoutlmft-model.onnx",
+      "main_script": "main.py",
+      "batch_size": 1
+    },
+    "hf_layoutlmft": {
+      "model_src_dir": "nlp/huggingface_model/token_classification/layoutlmft/quantization/ptq_static",
+      "dataset_location": "",
+      "input_model": "/tf_dataset2/models/onnx/hf_layoutlmft/layoutlmft-model.onnx",
+      "main_script": "main.py",
+      "batch_size": 1
     }
   }
 }

diff --git a/...ngface_model/token_classification/layoutlmft/quantization/ptq_dynamic/README.md b/...ngface_model/token_classification/layoutlmft/quantization/ptq_dynamic/README.md
@@ -0,0 +1,52 @@
+Step-by-Step
+============
+
+This example load LayoutLMv3 model and confirm its accuracy and speed based on [FUNSD](https://huggingface.co/datasets/nielsr/funsd) dataset.
+
+# Prerequisite
+
+## 1. Environment
+```shell
+pip install neural-compressor
+pip install -r requirements.txt
+bash install_layoutlmft.sh
+```
+> Note: Validated ONNX Runtime [Version](/docs/source/installation_guide.md#validated-software-environment).
+
+## 2. Prepare Model
+Finetune on FUNSD
+
+```bash
+python main.py \
+       --model_name_or_path microsoft/layoutlm-base-uncased \
+       --output_dir ./layoutlm-base-uncased-finetuned-funsd \
+       --do_train \
+       --max_steps 1000 \
+       --warmup_ratio 0.1 
+```
+
+Export a model to ONNX with `optimum.exporters.onnx`.
+
+```bash
+optimum-cli export onnx --model ./layoutlm-base-uncased-finetuned-funsd ./layoutlm-base-uncased-finetuned-funsd-onnx/ --task=token-classification
+```
+
+# Run
+
+## 1. Quantization
+
+Dynamic quantization:
+
+```bash
+bash run_tuning.sh --input_model=./layoutlm-base-uncased-finetuned-funsd-onnx/model.onnx \ # model path as *.onnx
+                   --output_model=/path/to/model_tune 
+```
+
+
+## 2. Benchmark
+
+```bash
+bash run_benchmark.sh --input_model=/path/to/model \ # model path as *.onnx
+                      --batch_size=batch_size \
+                      --mode=performance # or accuracy
+```
diff --git a/...face_model/token_classification/layoutlmft/quantization/ptq_dynamic/install_layoutlmft.sh b/...face_model/token_classification/layoutlmft/quantization/ptq_dynamic/install_layoutlmft.sh
@@ -0,0 +1,3 @@
+git clone https://github.com/microsoft/unilm.git
+cd unilm/layoutlmft
+pip install -e .