Skip to content

Commit

Permalink
Support export compressed model for AutoRound [2.x] (#1648)
Browse files Browse the repository at this point in the history
Signed-off-by: Kaihui-intel <kaihui.tang@intel.com>]
  • Loading branch information
Kaihui-intel committed Mar 5, 2024
1 parent 0a3d4bd commit dfd083d
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 2 deletions.
16 changes: 16 additions & 0 deletions neural_compressor/model/torch_model.py
Expand Up @@ -496,6 +496,9 @@ def export_compressed_model(
gptq_config = json.load(f)
else:
gptq_config = self.gptq_config if hasattr(self, "gptq_config") else {}

autoround_config = self.autoround_config if hasattr(self, "autoround_config") else {}

if gptq_config:
for k, v in weight_config.items():
logger.debug(f"Compressing {k} on device {device}")
Expand Down Expand Up @@ -555,6 +558,19 @@ def export_compressed_model(
)
new_module.pack(int_weight, gptq_scale, gptq_zp, m.bias, gptq_perm)
set_module(self.model, k, new_module)
elif autoround_config:
from auto_round.export.export_to_itrex import compress_model # pylint: disable=E0401

self.model = compress_model(
self.model,
weight_config=autoround_config,
enable_full_range=enable_full_range,
compression_dtype=compression_dtype,
compression_dim=compression_dim,
device=device,
use_optimum_format=use_optimum_format,
inplace=True,
)
else:
for k, v in weight_config.items():
logger.debug(f"Compressing {k} on device {device}")
Expand Down
10 changes: 8 additions & 2 deletions test/adaptor/pytorch_adaptor/test_weight_only_adaptor_pytorch.py
Expand Up @@ -801,6 +801,14 @@ def test_AutoRound_quant(self):
self.assertTrue("scale" in q_model.autoround_config["transformer.h.0.attn.k_proj"].keys())
self.assertTrue(torch.float32 == q_model.autoround_config["transformer.h.0.attn.k_proj"]["scale_dtype"])

export_model = q_model.export_compressed_model()
export_out = export_model(input)
self.assertTrue(torch.allclose(out2[0], export_out[0]))
from auto_round.export.export_to_itrex.model_wrapper import WeightOnlyLinear

self.assertTrue(isinstance(q_model.model.transformer.h[0].attn.k_proj, WeightOnlyLinear))
self.assertTrue(isinstance(export_model.transformer.h[0].attn.k_proj, WeightOnlyLinear))

fp32_model = copy.deepcopy(self.gptj)

conf = PostTrainingQuantConfig(
Expand Down Expand Up @@ -852,8 +860,6 @@ def test_AutoRound_quant(self):
)
out2 = export_model.model(input)
self.assertTrue(torch.allclose(out1[0], out2[0], atol=1e-01))
from auto_round.export.export_to_itrex.model_wrapper import WeightOnlyLinear

self.assertTrue(isinstance(export_model.model.transformer.h[0].attn.k_proj, WeightOnlyLinear))


Expand Down

0 comments on commit dfd083d

Please sign in to comment.