Traceback (most recent call last):
File "/home/uttest/miniforge3/envs/autoround_test/lib/python3.12/site-packages/auto_round/utils/model.py", line 307, in llm_load_model
model = model_cls.from_pretrained(
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/uttest/miniforge3/envs/autoround_test/lib/python3.12/site-packages/transformers/models/auto/auto_factory.py", line 597, in from_pretrained
return model_class.from_pretrained(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/uttest/miniforge3/envs/autoround_test/lib/python3.12/site-packages/transformers/modeling_utils.py", line 277, in _wrapper
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/home/uttest/miniforge3/envs/autoround_test/lib/python3.12/site-packages/transformers/modeling_utils.py", line 4881, in from_pretrained
hf_quantizer, config, dtype, device_map = get_hf_quantizer(
^^^^^^^^^^^^^^^^^
File "/home/uttest/miniforge3/envs/autoround_test/lib/python3.12/site-packages/transformers/quantizers/auto.py", line 319, in get_hf_quantizer
hf_quantizer.validate_environment(
File "/home/uttest/miniforge3/envs/autoround_test/lib/python3.12/site-packages/transformers/quantizers/quantizer_finegrained_fp8.py", line 54, in validate_environment
raise ValueError(
ValueError: FP8 quantized models is only supported on GPUs with compute capability >= 8.9 (e.g 4090/H100), actual = `8.0`
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/uttest/miniforge3/envs/autoround_test/bin/auto-round", line 10, in <module>
sys.exit(run())
^^^^^
File "/home/uttest/miniforge3/envs/autoround_test/lib/python3.12/site-packages/auto_round/__main__.py", line 889, in run
tune(args)
File "/home/uttest/miniforge3/envs/autoround_test/lib/python3.12/site-packages/auto_round/__main__.py", line 619, in tune
autoround: BaseCompressor = AutoRound(
^^^^^^^^^^
File "/home/uttest/miniforge3/envs/autoround_test/lib/python3.12/site-packages/auto_round/autoround.py", line 182, in __new__
ar = dynamic_compressor(
^^^^^^^^^^^^^^^^^^^
File "/home/uttest/miniforge3/envs/autoround_test/lib/python3.12/site-packages/auto_round/compressors/base.py", line 230, in __init__
model, tokenizer = llm_load_model(
^^^^^^^^^^^^^^^
File "/home/uttest/miniforge3/envs/autoround_test/lib/python3.12/site-packages/auto_round/utils/model.py", line 316, in llm_load_model
model = model_cls.from_pretrained(
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/uttest/miniforge3/envs/autoround_test/lib/python3.12/site-packages/transformers/models/auto/auto_factory.py", line 597, in from_pretrained
return model_class.from_pretrained(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/uttest/miniforge3/envs/autoround_test/lib/python3.12/site-packages/transformers/modeling_utils.py", line 277, in _wrapper
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/home/uttest/miniforge3/envs/autoround_test/lib/python3.12/site-packages/transformers/modeling_utils.py", line 5048, in from_pretrained
) = cls._load_pretrained_model(
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/uttest/miniforge3/envs/autoround_test/lib/python3.12/site-packages/transformers/modeling_utils.py", line 5468, in _load_pretrained_model
_error_msgs, disk_offload_index = load_shard_file(args)
^^^^^^^^^^^^^^^^^^^^^
File "/home/uttest/miniforge3/envs/autoround_test/lib/python3.12/site-packages/transformers/modeling_utils.py", line 843, in load_shard_file
disk_offload_index = _load_state_dict_into_meta_model(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/uttest/miniforge3/envs/autoround_test/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/home/uttest/miniforge3/envs/autoround_test/lib/python3.12/site-packages/transformers/modeling_utils.py", line 770, in _load_state_dict_into_meta_model
_load_parameter_into_model(model, param_name, param.to(param_device))
File "/home/uttest/miniforge3/envs/autoround_test/lib/python3.12/site-packages/transformers/modeling_utils.py", line 667, in _load_parameter_into_model
module.load_state_dict({param_type: tensor}, strict=False, assign=True)
File "/home/uttest/miniforge3/envs/autoround_test/lib/python3.12/site-packages/torch/nn/modules/module.py", line 2629, in load_state_dict
raise RuntimeError(
RuntimeError: Error(s) in loading state_dict for FP8Linear:
size mismatch for weight_scale_inv: copying a param with shape torch.Size([8, 32]) from checkpoint, the shape in current model is torch.Size([6, 32]).
Model quantization with AutoRound failed.
Impact Model list
XiaomiMiMo/MiMo-V2-Flash
XiaomiMiMo/MiMo-V2-Flash-Base
Issue details