Extend the strategy capability for adding the new data type (#555)

* refined the tuning space Signed-off-by: yiliu30 <yi4.liu@intel.com> * fixed the merge with user cfg Signed-off-by: yiliu30 <yi4.liu@intel.com> * parse tuning space Signed-off-by: yiliu30 <yi4.liu@intel.com> * refined the tuning space Signed-off-by: yiliu30 <yi4.liu@intel.com> * clean code Signed-off-by: yiliu30 <yi4.liu@intel.com> * refine the logical Signed-off-by: yiliu30 <yi4.liu@intel.com> * fixed the pylint error Signed-off-by: yiliu30 <yi4.liu@intel.com> * fixed the typo Signed-off-by: yiliu30 <yi4.liu@intel.com> * fix typo Signed-off-by: yiliu30 <yi4.liu@intel.com> * fixed the merge Signed-off-by: yiliu30 <yi4.liu@intel.com> * fixed the auto quant Signed-off-by: yiliu30 <yi4.liu@intel.com> * fixed quant_mode error Signed-off-by: yiliu30 <yi4.liu@intel.com> * revert some change Signed-off-by: yiliu30 <yi4.liu@intel.com> * clean code Signed-off-by: yiliu30 <yi4.liu@intel.com> * add ut for int4 Signed-off-by: yiliu30 <yi4.liu@intel.com> * fixed the parse order Signed-off-by: yiliu30 <yi4.liu@intel.com> --------- Signed-off-by: yiliu30 <yi4.liu@intel.com>
intel · Feb 22, 2023 · d0059c4 · d0059c4
1 parent 750dff7
commit d0059c4
Show file tree

Hide file tree

Showing 12 changed files with 996 additions and 368 deletions.
diff --git a/neural_compressor/strategy/basic.py b/neural_compressor/strategy/basic.py
@@ -25,7 +25,7 @@
 
 from .utils.tuning_sampler import OpTypeWiseTuningSampler, FallbackTuningSampler, ModelWiseTuningSampler
 from .utils.tuning_structs import OpTuningConfig
-from .utils.tuning_space import TUNING_ITEMS_LST
+from .utils.constant import TUNING_ITEMS_LST
 
 @strategy_registry
 class BasicTuneStrategy(TuneStrategy):
@@ -45,13 +45,13 @@ def next_tune_cfg(self):
         tuning_space = self.tuning_space
         calib_sampling_size_lst = tuning_space.root_item.get_option_by_name('calib_sampling_size').options
         for calib_sampling_size in calib_sampling_size_lst:
-            # Initialize the tuning config for each op according to the quantization approach 
+            # Initialize the tuning config for each op according to the quantization approach.
             op_item_dtype_dict, quant_mode_wise_items, initial_op_tuning_cfg = self.initial_tuning_cfg()
             # Optype-wise tuning tuning items: the algorithm/scheme/granularity of activation(weight)
             early_stop_tuning = False
             stage1_cnt = 0
-            quant_ops = quant_mode_wise_items['static'] if 'static' in quant_mode_wise_items else []
-            quant_ops += quant_mode_wise_items['dynamic'] if 'dynamic' in quant_mode_wise_items else []
+            quant_ops = quant_mode_wise_items.get('static', [])
+            quant_ops += quant_mode_wise_items.get('dynamic', [])
             stage1_max = 1e9  # TODO set a more appropriate value
             op_wise_tuning_sampler = OpTypeWiseTuningSampler(tuning_space, [], [], 
                                                              op_item_dtype_dict, initial_op_tuning_cfg)
@@ -120,22 +120,25 @@ def _initial_dynamic_cfg_based_on_static_cfg(self, op_static_cfg:OpTuningConfig)
         op_state = op_static_cfg.get_state()
         op_name = op_static_cfg.op_name
         op_type = op_static_cfg.op_type
+        op_name_type = (op_name, op_type)
         op_quant_mode = 'dynamic'
         tuning_space = self.tuning_space
         dynamic_state = {}
         for att in ['weight', 'activation']:
-            if att not in op_state:
-                continue
-            for item_name, item_val in op_state[att].items():
-                att_item = (att, item_name)
-                if att_item not in TUNING_ITEMS_LST:
-                    continue
-                if tuning_space.query_item_option((op_name, op_type), op_quant_mode, att_item, item_val):
-                    dynamic_state[att_item] = item_val
+            if att not in op_state: continue
+            # Add dtype
+            full_path = self.tuning_space.get_op_default_path_by_pattern(op_name_type, op_quant_mode)
+            dynamic_state[att + '_dtype'] = self.tuning_space.ops_data_type[op_name_type][full_path[att]]
+            for method_name, method_val in op_state[att].items():
+                att_and_method_name = (att, method_name)
+                if att_and_method_name not in TUNING_ITEMS_LST: continue
+                if tuning_space.query_item_option(op_name_type, full_path[att], att_and_method_name, method_val):
+                    dynamic_state[att_and_method_name] = method_val
                 else:
-                    quant_mode_item = tuning_space.query_quant_mode_item((op_name, op_type), op_quant_mode)
-                    tuning_item = quant_mode_item.get_option_by_name(att_item)
-                    dynamic_state[att_item] = tuning_item.options[0] if tuning_item else None
+                    quant_mode_item = tuning_space.get_item_by_path((op_name_type, *full_path[att]))
+                    if quant_mode_item and quant_mode_item.get_option_by_name(att_and_method_name):
+                        tuning_item = quant_mode_item.get_option_by_name(att_and_method_name)
+                        dynamic_state[att_and_method_name] = tuning_item.options[0] if tuning_item else None
         return OpTuningConfig(op_name, op_type, op_quant_mode, tuning_space, kwargs=dynamic_state)
 
 
diff --git a/neural_compressor/strategy/conservative.py b/neural_compressor/strategy/conservative.py
@@ -75,7 +75,7 @@ def next_tune_cfg(self):
                 tmp_tune_cfg = deepcopy(tune_cfg)
                 for item, quant_mode in items_lst:
                     op_info = item.name
-                    op_config = tuning_space.set_deafult_config(op_info, quant_mode)
+                    op_config = tuning_space.get_default_config(op_info, quant_mode)
                     tmp_tune_cfg[op_info] = op_config
                 yield tmp_tune_cfg
                 if self.acc_meet_flag:
@@ -87,7 +87,7 @@ def next_tune_cfg(self):
                     logger.info(f"*** Try to convert {op_type} op into {dtype} one by one.")
                     for item, quant_mode in items_lst:
                         op_info = item.name
-                        op_config = tuning_space.set_deafult_config(op_info, quant_mode)
+                        op_config = tuning_space.get_default_config(op_info, quant_mode)
                         tmp_tune_cfg[op_info] = op_config
                         yield tmp_tune_cfg
                         if self.acc_meet_flag:
@@ -358,9 +358,9 @@ def _initialize_tune_cfg(self):
                 for op_info in tmp_non_fp32_ops:
                     non_fp32_ops_dtype[op_info] = quant_mode
         for op_info in fp32_ops:
-            initial_tuning_cfg[op_info] = tuning_space.set_deafult_config(op_info, "fp32")
+            initial_tuning_cfg[op_info] = tuning_space.get_default_config(op_info, "fp32")
         for op_info, quant_mode in non_fp32_ops_dtype.items():
-            initial_tuning_cfg[op_info] = tuning_space.set_deafult_config(op_info, quant_mode)
+            initial_tuning_cfg[op_info] = tuning_space.get_default_config(op_info, quant_mode)
         return initial_tuning_cfg
 
     def _quant_items_pool(self, op_type_priority: List[str]) -> OrderedDict[

diff --git a/neural_compressor/strategy/hawq_v2.py b/neural_compressor/strategy/hawq_v2.py
@@ -24,7 +24,7 @@
 
 from .utils.tuning_sampler import OpTypeWiseTuningSampler, FallbackTuningSampler, ModelWiseTuningSampler
 from .utils.tuning_structs import OpTuningConfig
-from .utils.tuning_space import TUNING_ITEMS_LST
+from .utils.constant import TUNING_ITEMS_LST
 from ..utils import logger
 
 @strategy_registry
@@ -51,8 +51,8 @@ def next_tune_cfg(self):
         # Optype-wise tuning tuning items: the algorithm/scheme/granularity of activation(weight)
         early_stop_tuning = True
         stage1_cnt = 0
-        quant_ops = quant_mode_wise_items['static'] if 'static' in quant_mode_wise_items else []
-        quant_ops += quant_mode_wise_items['dynamic'] if 'dynamic' in quant_mode_wise_items else []
+        quant_ops = quant_mode_wise_items.get('static', [])
+        quant_ops += quant_mode_wise_items.get('dynamic', [])
         stage1_max = 1  # TODO set a more appropriate value
         op_wise_tuning_sampler = OpTypeWiseTuningSampler(tuning_space, [], [],
                                                          op_item_dtype_dict, initial_op_tuning_cfg)
@@ -110,24 +110,3 @@ def next_tune_cfg(self):
             op_tuning_cfg['calib_sampling_size'] = calib_size
             yield op_tuning_cfg
 
-    def _initial_dynamic_cfg_based_on_static_cfg(self, op_static_cfg: OpTuningConfig):
-        op_state = op_static_cfg.get_state()
-        op_name = op_static_cfg.op_name
-        op_type = op_static_cfg.op_type
-        op_quant_mode = 'dynamic'
-        tuning_space = self.tuning_space
-        dynamic_state = {}
-        for att in ['weight', 'activation']:
-            if att not in op_state:
-                continue
-            for item_name, item_val in op_state[att].items():
-                att_item = (att, item_name)
-                if att_item not in TUNING_ITEMS_LST:
-                    continue
-                if tuning_space.query_item_option((op_name, op_type), op_quant_mode, att_item, item_val):
-                    dynamic_state[att_item] = item_val
-                else:
-                    quant_mode_item = tuning_space.query_quant_mode_item((op_name, op_type), op_quant_mode)
-                    tuning_item = quant_mode_item.get_option_by_name(att_item)
-                    dynamic_state[att_item] = tuning_item.options[0] if tuning_item else None
-        return OpTuningConfig(op_name, op_type, op_quant_mode, tuning_space, kwargs=dynamic_state)
diff --git a/neural_compressor/strategy/strategy.py b/neural_compressor/strategy/strategy.py
@@ -291,7 +291,7 @@ def _remove_redundant_qmodel(self):
         self.best_qmodel = None
 
     def _can_create_eval_func_from_cfg(self):
-        """Determines whether an eval function can be created from cfg.
+        """Determine whether an eval function can be created from cfg.
 
         Returns:
             Returns True if the eval func can be created from config, False otherwise.
@@ -432,20 +432,24 @@ def initial_tuning_cfg(self):
             quant_mode_wise_items (OrderedDict): key is quant_mode/precision; value is item list.
             initial_op_tuning_cfg (OrderedDict): key is (op_name, op_type); value is the initialized tuning config.
         """
+        from .utils.constant import auto_query_order, static_query_order, dynamic_query_order
+        from .utils.tuning_space import initial_tuning_cfg_with_quant_mode
         if self.cfg.quantization.approach == 'post_training_auto_quant':
-            query_order = ['static', 'dynamic', 'bf16', 'fp32']
+            query_order = auto_query_order
         elif self.cfg.quantization.approach == 'post_training_dynamic_quant':
-            query_order = ['dynamic', 'bf16', 'fp32']
+            query_order = dynamic_query_order
         elif self.cfg.quantization.approach == 'post_training_static_quant':
-            query_order = ['static', 'bf16', 'fp32']
+            query_order = static_query_order
         elif self.cfg.quantization.approach == 'quant_aware_training':
-            query_order = ['static', 'dynamic', 'bf16', 'fp32']
+            logger.info("!!! Currently, the qat tuning is not supported by strategy.")
+            query_order = auto_query_order
 
-        quant_mode_wise_items = OrderedDict()
+        quant_mode_wise_items = OrderedDict() # mode, op_item_lst
         pre_items = set()
+        # Collect op items supported the specified mode.
         for quant_mode in query_order:
             items = self.tuning_space.query_items_by_quant_mode(quant_mode)
-            filtered_items = [item for item in items if item not in pre_items]
+            filtered_items = list(filter(lambda item: item not in pre_items, items))
             pre_items = pre_items.union(set(items))
             quant_mode_wise_items[quant_mode] = filtered_items
 
@@ -456,11 +460,12 @@ def initial_op_quant_mode(items_lst, target_quant_mode, op_item_dtype_dict):
         op_item_dtype_dict = OrderedDict()
         for quant_mode, quant_mode_items in quant_mode_wise_items.items():
             initial_op_quant_mode(quant_mode_items, quant_mode, op_item_dtype_dict)
-
+        
         initial_op_tuning_cfg = {}
-        for op_name_dtype, quant_mode in op_item_dtype_dict.items():
-            initial_op_tuning_cfg[op_name_dtype] = OpTuningConfig(op_name_dtype[0], op_name_dtype[1], 
-                                                                  quant_mode, self.tuning_space)
+        for op_name_type, quant_mode in op_item_dtype_dict.items():
+            initial_op_tuning_cfg[op_name_type] = initial_tuning_cfg_with_quant_mode(op_name_type,
+                                                                                     quant_mode, 
+                                                                                     self.tuning_space)
         return op_item_dtype_dict, quant_mode_wise_items, initial_op_tuning_cfg
 
     def show_baseline_info(self):

diff --git a/neural_compressor/strategy/utils/constant.py b/neural_compressor/strategy/utils/constant.py
@@ -0,0 +1,31 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Strategy constant."""
+
+PRECISION_SET = {'bf16', 'fp16' , 'fp32',}
+QUANT_MODE_SET = {'static', 'dynamic'}
+QUNAT_BIT_SET = {'int8', 'uint8', 'int4', 'uint4'}
+
+TUNING_ITEMS_LST = [('activation','scheme'), ('activation','algorithm'), ('activation','granularity'),
+                    ('weight','scheme'), ('weight','algorithm'), ('weight','granularity'), 'sampling_size']
+
+PRECISION_SET_V2_0 = {'fp32', 'bf16'}
+
+auto_query_order = ['static', 'dynamic', 'bf16', 'fp16', 'fp32']
+static_query_order = ['static', 'bf16', 'fp16', 'fp32']
+dynamic_query_order = ['dynamic', 'bf16', 'fp16', 'fp32']