Narrow the tuning space of sq auto-tune (#1489)

Signed-off-by: yiliu30 <yi4.liu@intel.com>
intel · Dec 22, 2023 · 9600e1d · 9600e1d
1 parent 6c78dfe
commit 9600e1d
Show file tree

Hide file tree

Showing 4 changed files with 24 additions and 16 deletions.
diff --git a/docs/source/tuning_strategies.md b/docs/source/tuning_strategies.md
@@ -179,7 +179,7 @@ flowchart TD
 
 > `*` INC will detect the block pattern for [transformer-like](https://arxiv.org/abs/1706.03762) model by default.
 
-> For [smooth quantization](./smooth_quant.md), users can tune the smooth quantization alpha by providing a list of scalars for the `alpha` item. The tuning process will take place at the **start stage** of the tuning procedure. For details usage, please refer to the [smooth quantization example](./smooth_quant.md#Example).
+> For [smooth quantization](./smooth_quant.md), users can tune the smooth quantization alpha by providing a list of scalars for the `alpha` item. For details usage, please refer to the [smooth quantization example](./smooth_quant.md#Usage).
 
 > For [weight-only quantization](./quantization_weight_only.md), users can tune the weight-only  algorithms from the available [pre-defined configurations](./quantization_weight_only.md#woq-algorithms-tuning). The tuning process will take place at the **start stage** of the tuning procedure, preceding the smooth quantization alpha tuning. For details usage, please refer to the [weight-only quantization example](./quantization_weight_only.md#woq-algorithms-tuning).
 *Please note that this behavior is specific to the `ONNX Runtime` backend.*

diff --git a/neural_compressor/strategy/strategy.py b/neural_compressor/strategy/strategy.py
@@ -186,6 +186,7 @@ def __init__(
         # track tuning cfg with the current best accuracy
         self.cur_best_tuning_cfg = {}
         self.re_quant = False
+        self.early_stop_sq_tuning_process = False
 
         self._trials_count = 0
         self._capability = None
@@ -1152,6 +1153,9 @@ def _should_tuning_sq_alpha(self, recipes):
     def tuning_sq_alpha(self, tuning_space, tuning_cfg, recipes):
         """Tuning smooth quant's alpha.
 
+        After trying all alpha values, the sq tuning process will stop early, returning the current best qmodel,
+        even if the current best accuracy does not meet the accuracy criterion.
+
         Args:
             tuning_space: tuning space
             tuning_cfg: the initial tuning config
@@ -1166,8 +1170,12 @@ def tuning_sq_alpha(self, tuning_space, tuning_cfg, recipes):
         ), "Only tune the smooth quant's alpha when user provide the alpha list,\
             but got alpha_list: {alpha_list}"
         logger.info("[STRATEGY] Start tuning smooth quant'alpha.")
+        number_of_alpha = len(sq_alpha_list)
+        sq_trials_cnt = 0
         sq_sampler = tuning_sampler_dict.get_class("smooth_quant")(tuning_space, [], tuning_cfg, sq_alpha_list)
         for tune_cfg in sq_sampler:
+            sq_trials_cnt += 1
+            self.early_stop_sq_tuning_process = sq_trials_cnt == number_of_alpha
             yield tune_cfg
 
     def _should_tuning_woq_algo(self):
@@ -1961,6 +1969,16 @@ def stop(self, timeout, trials_count):
             need_stop = True
         else:
             need_stop = False
+        if not need_stop and self.early_stop_sq_tuning_process:
+            if self.best_tuning_cfg is None:
+                self.best_tuning_cfg = self._tune_cfg_converter(self.cur_best_tuning_cfg)
+            logger.info(
+                "[Strategy] Tried all alpha values but none met the accuracy criterion. "
+                "The tuning process was early stopped and "
+                f"the currently best model(accuracy: {self.cur_best_acc}) was returned."
+            )
+
+            need_stop = True
 
         return need_stop
 

diff --git a/test/algorithm/test_smooth_quant.py b/test/algorithm/test_smooth_quant.py
@@ -1150,6 +1150,8 @@ def _test_sq_tune_alpha_common(self, eval_func, alpha=np.arange(0.1, 0.2, 0.05).
         from neural_compressor import quantization
         from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion
 
+        logger.info(f"alpha is: {alpha}")
+
         tuning_criterion = TuningCriterion(max_trials=8)
 
         fp32_model = DemoModel()
@@ -1183,8 +1185,8 @@ def fake_eval(model, eval_result_lst):
         # test for alpha is a list
         for eval_result_lst, note in [
             ([1, 0.8, 1.1, 0.7, 1.1], "Expect tuning ends at 2nd trial with alpha is 0.15"),
-            ([1, 0.8, 0.9, 0.7, 1.1], "Expect tuning ends at 4th trial with alpha is 0.15"),
-            ([1, 0.9, 0.8, 0.7, 1.1], "Expect tuning ends at 4th trial with alpha is 0.10"),
+            ([1, 0.8, 0.9, 0.7, 1.1], "Expect tuning ends at 2nd trial with alpha is 0.15"),
+            ([1, 0.9, 0.8, 0.7, 1.1], "Expect tuning ends at 1st trial with alpha is 0.10"),
         ]:
             logger.info(f"test_sq_tune_alpha_common with eval_result_lst: {eval_result_lst}")
             logger.info(note)
@@ -1222,13 +1224,7 @@ def fake_eval(model, eval_result_lst):
                 [1, 0.8, 0.9, 0.7, 1.1],
                 np.arange(0.1, 0.2, 0.05).tolist(),
                 "auto",
-                "Expect tuning ends at 4th trial with alpha is  0.15 at basic strategy.",
-            ),
-            (
-                [1, 1.1, 0.8, 0.7, 1.1],
-                np.arange(0.1, 0.2, 0.05).tolist(),
-                0,
-                "Expect tuning ends at 1th trial with alpha is 0.1",
+                "Expect tuning ends at 2th trial with alpha is  0.15 at basic strategy.",
             ),
         ]:
             logger.info("test_sq_tune_alpha_common with ")

diff --git a/test/algorithm/test_smooth_quant_onnx.py b/test/algorithm/test_smooth_quant_onnx.py
@@ -279,12 +279,6 @@ def fake_eval(model, eval_result_lst):
                 "auto",
                 "Expect tuning ends at 4th trial with alpha is  0.15 at basic strategy.",
             ),
-            (
-                [1, 1.1, 0.8, 0.7, 1.1],
-                np.arange(0.1, 0.2, 0.05).tolist(),
-                0,
-                "Expect tuning ends at 1th trial with alpha is 0.1",
-            ),
         ]:
             logger.info("test_sq_tune_alpha_common with ")
             logger.info(f"eval_result_lst: {eval_result_lst}, alpha: {alpha}, quant_level: {quant_level}")