From 206ca380063363c0e408e4915524402c5f88ec74 Mon Sep 17 00:00:00 2001 From: changwangss Date: Thu, 23 May 2024 20:53:12 -0700 Subject: [PATCH 1/2] modify autoround config parameter Signed-off-by: changwangss --- .../text-generation/quantization/run_generation_cpu_woq.py | 4 ++-- .../transformers/llm/quantization/utils.py | 2 +- intel_extension_for_transformers/transformers/utils/config.py | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/examples/huggingface/pytorch/text-generation/quantization/run_generation_cpu_woq.py b/examples/huggingface/pytorch/text-generation/quantization/run_generation_cpu_woq.py index ab3060a823e..d0d0511bdeb 100644 --- a/examples/huggingface/pytorch/text-generation/quantization/run_generation_cpu_woq.py +++ b/examples/huggingface/pytorch/text-generation/quantization/run_generation_cpu_woq.py @@ -154,7 +154,7 @@ help="minmax learning rate, if None,it will beset to be the same with lr", ) parser.add_argument( - "--enable_quanted_input", + "--disable_quanted_input", action="store_true", help="whether to use the output of quantized block to tune the next block", ) @@ -286,7 +286,7 @@ calib_len=args.calib_len, lr=args.lr, minmax_lr=args.minmax_lr, - enable_quanted_input=args.enable_quanted_input, + disable_quanted_input=args.disable_quanted_input, use_ipex=args.use_ipex, ) else: diff --git a/intel_extension_for_transformers/transformers/llm/quantization/utils.py b/intel_extension_for_transformers/transformers/llm/quantization/utils.py index fc0e4f86221..21c39bb2266 100644 --- a/intel_extension_for_transformers/transformers/llm/quantization/utils.py +++ b/intel_extension_for_transformers/transformers/llm/quantization/utils.py @@ -527,7 +527,7 @@ def default_calib_func(model): "seqlen": config.calib_len, "iters": config.iters, "scale_dtype": config.scale_dtype, - "enable_quanted_input": config.enable_quanted_input, + "enable_quanted_input": False if config.disable_quanted_input else True, "lr": config.lr, "minmax_lr": config.minmax_lr, } diff --git a/intel_extension_for_transformers/transformers/utils/config.py b/intel_extension_for_transformers/transformers/utils/config.py index 503f18e9889..a63d22ba0ea 100644 --- a/intel_extension_for_transformers/transformers/utils/config.py +++ b/intel_extension_for_transformers/transformers/utils/config.py @@ -1056,7 +1056,7 @@ def __init__( sym: bool = False, lr: float = None, minmax_lr: float = None, - enable_quanted_input: bool = True, + disable_quanted_input: bool = False, nsamples: int = 512, iters: int = 200, use_ggml: bool = False, @@ -1083,7 +1083,7 @@ def __init__( self.group_size = group_size self.lr = lr self.minmax_lr = minmax_lr - self.enable_quanted_input = enable_quanted_input + self.disable_quanted_input = disable_quanted_input self.iters = iters self.llm_int8_skip_modules = ( llm_int8_skip_modules if llm_int8_skip_modules else [] From 264bf42356e831eed46b311a9c4826925c1863d6 Mon Sep 17 00:00:00 2001 From: changwangss Date: Thu, 23 May 2024 22:43:08 -0700 Subject: [PATCH 2/2] improve code Signed-off-by: changwangss --- .../transformers/llm/quantization/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/intel_extension_for_transformers/transformers/llm/quantization/utils.py b/intel_extension_for_transformers/transformers/llm/quantization/utils.py index 21c39bb2266..e04d778c95c 100644 --- a/intel_extension_for_transformers/transformers/llm/quantization/utils.py +++ b/intel_extension_for_transformers/transformers/llm/quantization/utils.py @@ -527,7 +527,7 @@ def default_calib_func(model): "seqlen": config.calib_len, "iters": config.iters, "scale_dtype": config.scale_dtype, - "enable_quanted_input": False if config.disable_quanted_input else True, + "enable_quanted_input": not config.disable_quanted_input, "lr": config.lr, "minmax_lr": config.minmax_lr, }