Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions auto_round/compressors/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1551,13 +1551,15 @@ def quantize(self) -> tuple[torch.nn.Module, dict[str, Any]]:
Returns:
The quantized model and layer configurations.
"""
for n, m in self.model.named_modules(): # TODO check if could removed
m.tmp_name = n

self._check_compatibility()
formats = self.formats if hasattr(self, "formats") else None
# It is best to modify the model structure in the quantize function and check the format,
# because it may cause the gguf format to not be exported normally.
self.model = _handle_moe_model(self.model, formats=formats)
# Assign temporary names after replacing modules
for n, m in self.model.named_modules(): # TODO check if could removed
m.tmp_name = n

# TODO check scale_dtype
if not self.is_auto_scheme:
Expand Down