From 10a32e5319c8a60c61225cde81dce24050e4e670 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Sun, 7 Dec 2025 22:44:54 -0800 Subject: [PATCH] move packeding res to cpu Signed-off-by: yiliu30 --- auto_round/compressors/base.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/auto_round/compressors/base.py b/auto_round/compressors/base.py index a3309bc41..8bf788c24 100644 --- a/auto_round/compressors/base.py +++ b/auto_round/compressors/base.py @@ -1332,6 +1332,8 @@ def _quantize_layer_via_rtn(self, name: str, dtype: torch.dtype = None, to_cpu=T self._immediate_pack(name) if to_cpu: m = m.to("cpu") + packed_m = get_module(self.model, name) + set_module(self.model, name, packed_m.to("cpu")) else: if to_cpu: m = m.to("cpu")