Skip to content

Commit

Permalink
layer-wise quant (#994)
Browse files Browse the repository at this point in the history
* layer-wise quant

Signed-off-by: Guo, Heng <heng.guo@intel.com>
  • Loading branch information
n1ck-guo committed Aug 4, 2023
1 parent d6c2d40 commit d9d1fcc
Show file tree
Hide file tree
Showing 14 changed files with 2,624 additions and 1 deletion.
1 change: 1 addition & 0 deletions .azure-pipelines/scripts/codeScan/pylint/pylint.sh
Expand Up @@ -31,6 +31,7 @@ pip install torch==1.12.0 \
onnxruntime_extensions \
tf_slim \
transformers \
accelerate \
flask==2.1.3 \
xgboost

Expand Down
15 changes: 15 additions & 0 deletions neural_compressor/adaptor/pytorch.py
Expand Up @@ -3346,6 +3346,21 @@ def quantize(self, tune_cfg, model, dataloader, q_func=None):
else:
self.prepare_custom_config_dict, self.convert_custom_config_dict = None, None
self.fx_op_cfgs = _cfgs_to_fx_cfgs(op_cfgs, self.approach)

# for layer-wise quant
# recipe_cfgs = tune_cfg.get('recipe_cfgs', None)
if recipe_cfgs and recipe_cfgs.get('layer_wise_quant', False) \
and self.approach != 'post_training_dynamic_quant':
from .torch_utils.layer_wise_quant import LayerWiseQuant

model_path = recipe_cfgs['layer_wise_quant_args'].get('model_path', None)
assert model_path is not None,\
"the layer_wise_quant_args should have args model_path to load the weight of model."
device = recipe_cfgs['layer_wise_quant_args'].get('decvice', 'cpu')
lw_quant = LayerWiseQuant(q_model._model, model_path, self.fx_op_cfgs, device=device)
q_model._model = lw_quant.quantize(dataloader, clean_weight=False)
return q_model

self.tune_cfg['fx_sub_module_list'] = self.sub_module_list
if self.approach == 'quant_aware_training':
q_model._model.train()
Expand Down
19 changes: 19 additions & 0 deletions neural_compressor/adaptor/torch_utils/layer_wise_quant/__init__.py
@@ -0,0 +1,19 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright (c) 2023 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Torch layer-wise quantization module."""
from .utils import load_shell
from .quantize import LayerWiseQuant

0 comments on commit d9d1fcc

Please sign in to comment.