Skip to content

Commit

Permalink
Fix woq autoround last layer quant issue (#1419)
Browse files Browse the repository at this point in the history
Signed-off-by: changwangss <chang1.wang@intel.com>
  • Loading branch information
changwangss committed Mar 27, 2024
1 parent fbbd653 commit d21bb3e
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,9 @@ def replace_linear(
empty_weights=False,
):
if modules_to_not_convert is None:
modules_to_not_convert = ["lm_head"]
# output_layer is chatglm last layer name
# embed_out is dolly_v2 last layer name
modules_to_not_convert = ["lm_head", "output_layer", "embed_out"]
if quantization_config.llm_int8_skip_modules:
modules_to_not_convert = modules_to_not_convert.extend(
quantization_config.llm_int8_skip_modules
Expand Down Expand Up @@ -518,6 +520,12 @@ def default_calib_func(model):
".*lm_head": { # re.match
"weight": {"dtype": "fp32"},
},
".*output_layer": { # re.match
"weight": {"dtype": "fp32"},
},
".*embed_out": { # re.match
"weight": {"dtype": "fp32"},
},
},
recipes=recipes,
)
Expand All @@ -532,7 +540,6 @@ def default_calib_func(model):
if orig_dtype != torch.float32:
model.to(dtype=torch.float32)
break

inc_model = quantization.fit(
model, conf, calib_func=calib_func, calib_dataloader=calib_dataloader
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ def build_woq_model(model, quantization_config):
from neural_compressor.adaptor.torch_utils.util import set_module

for n, m in model.named_modules():
if "lm_head" in n:
if "lm_head" in n or "output_layer" in n or "embed_out" in n:
continue
if isinstance(m, torch.nn.Linear):
zp = (
Expand Down
2 changes: 0 additions & 2 deletions intel_extension_for_transformers/transformers/utils/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -940,7 +940,6 @@ class AutoRoundConfig(ITREXQuantizationConfigMixin):
def __init__(
self,
bits: int = 8,
dtype: str = "int",
tokenizer: Any = None,
dataset: str = "NeelNanda/pile-10k",
group_size: int = 32,
Expand All @@ -955,7 +954,6 @@ def __init__(
use_quant_input: bool = True,
nsamples: int = 128,
iters: int = 200,
static_groups: bool = False,
use_ggml: bool = False,
use_neural_speed: bool = False,
llm_int8_skip_modules=None,
Expand Down

0 comments on commit d21bb3e

Please sign in to comment.