From f711b4c92798b5205a8f8665929fb10a89da7e71 Mon Sep 17 00:00:00 2001 From: kaiyaointel Date: Wed, 30 Nov 2022 15:39:22 +0800 Subject: [PATCH] Neural Coder adding support of ONNX RT dynamic quant and static quant QDQ (#184) * Create onnx_inc_static_quant_qdqyaml * Rename onnx_inc_static_quant_qdqyaml to onnx_inc_static_quant_qdq.yaml * Create onnx_inc_dynamic_quant.yaml --- .../backends/onnx_inc_dynamic_quant.yaml | 30 ++++++++++++++++++ .../backends/onnx_inc_static_quant_qdq.yaml | 31 +++++++++++++++++++ 2 files changed, 61 insertions(+) create mode 100644 neural_coder/backends/onnx_inc_dynamic_quant.yaml create mode 100644 neural_coder/backends/onnx_inc_static_quant_qdq.yaml diff --git a/neural_coder/backends/onnx_inc_dynamic_quant.yaml b/neural_coder/backends/onnx_inc_dynamic_quant.yaml new file mode 100644 index 00000000000..3c50de8da8e --- /dev/null +++ b/neural_coder/backends/onnx_inc_dynamic_quant.yaml @@ -0,0 +1,30 @@ +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +transformation: + location: + - insert_below_model_definition_line + content: + - |- + [+] from neural_compressor.experimental import Quantization, common + [+] from neural_compressor import options, conf + [+] conf.model.framework = 'onnxrt_integerops' + [+] conf.quantization.approach = 'post_training_dynamic_quant' + [+] quantizer = Quantization(conf) + [+] quantizer.model = common.Model(MODEL_NAME) + [+] quantizer.eval_func = EVAL_FUNCTION_NAME + [+] MODEL_NAME = quantizer() + order: + - below: + above: diff --git a/neural_coder/backends/onnx_inc_static_quant_qdq.yaml b/neural_coder/backends/onnx_inc_static_quant_qdq.yaml new file mode 100644 index 00000000000..730c3220f45 --- /dev/null +++ b/neural_coder/backends/onnx_inc_static_quant_qdq.yaml @@ -0,0 +1,31 @@ +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +transformation: + location: + - insert_below_model_definition_line + content: + - |- + [+] from neural_compressor.experimental import Quantization, common + [+] from neural_compressor import options, conf + [+] conf.model.framework = 'onnxrt_qdqops' + [+] conf.quantization.approach = 'post_training_static_quant' + [+] quantizer = Quantization(conf) + [+] quantizer.model = common.Model(MODEL_NAME) + [+] quantizer.calib_dataloader = DATALOADER_NAME + [+] quantizer.eval_func = EVAL_FUNCTION_NAME + [+] MODEL_NAME = quantizer() + order: + - below: + above: