layer-wise quant (#994)

* layer-wise quant Signed-off-by: Guo, Heng <heng.guo@intel.com>
intel · Aug 4, 2023 · d9d1fcc · d9d1fcc
1 parent d6c2d40
commit d9d1fcc
Show file tree

Hide file tree

Showing 14 changed files with 2,624 additions and 1 deletion.
diff --git a/.azure-pipelines/scripts/codeScan/pylint/pylint.sh b/.azure-pipelines/scripts/codeScan/pylint/pylint.sh
@@ -31,6 +31,7 @@ pip install torch==1.12.0 \
             onnxruntime_extensions \
             tf_slim \
             transformers \
+            accelerate \
             flask==2.1.3 \
             xgboost
 

diff --git a/neural_compressor/adaptor/pytorch.py b/neural_compressor/adaptor/pytorch.py
@@ -3346,6 +3346,21 @@ def quantize(self, tune_cfg, model, dataloader, q_func=None):
         else:
             self.prepare_custom_config_dict, self.convert_custom_config_dict = None, None
         self.fx_op_cfgs = _cfgs_to_fx_cfgs(op_cfgs, self.approach)
+
+        # for layer-wise quant
+        # recipe_cfgs = tune_cfg.get('recipe_cfgs', None)
+        if recipe_cfgs and recipe_cfgs.get('layer_wise_quant', False) \
+                and self.approach != 'post_training_dynamic_quant':
+            from .torch_utils.layer_wise_quant import LayerWiseQuant
+
+            model_path = recipe_cfgs['layer_wise_quant_args'].get('model_path', None)
+            assert model_path is not None,\
+                "the layer_wise_quant_args should have args model_path to load the weight of model."
+            device = recipe_cfgs['layer_wise_quant_args'].get('decvice', 'cpu')
+            lw_quant = LayerWiseQuant(q_model._model, model_path, self.fx_op_cfgs, device=device)
+            q_model._model = lw_quant.quantize(dataloader, clean_weight=False)
+            return q_model
+
         self.tune_cfg['fx_sub_module_list'] = self.sub_module_list
         if self.approach == 'quant_aware_training':
             q_model._model.train()

diff --git a/neural_compressor/adaptor/torch_utils/layer_wise_quant/__init__.py b/neural_compressor/adaptor/torch_utils/layer_wise_quant/__init__.py
@@ -0,0 +1,19 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Torch layer-wise quantization module."""
+from .utils import load_shell
+from .quantize import LayerWiseQuant