diff --git a/auto_round/alg_ext.abi3.so b/auto_round/alg_ext.abi3.so index 2f2907cfc..af9f2b29c 100755 Binary files a/auto_round/alg_ext.abi3.so and b/auto_round/alg_ext.abi3.so differ diff --git a/auto_round/compressors/base.py b/auto_round/compressors/base.py index fe0939c1f..de2c4cf48 100644 --- a/auto_round/compressors/base.py +++ b/auto_round/compressors/base.py @@ -2733,6 +2733,7 @@ def _quantize_blocks( else: logger.info("using algorithm extension for quantization.") except (ImportError, ModuleNotFoundError): + logger.error("algorithm extension import error, fallback to default mode") quantize_block = self._quantize_block else: quantize_block = self._quantize_block diff --git a/test/test_cpu/test_autoround.py b/test/test_cpu/test_autoround.py index 3adfd9f47..626dec380 100644 --- a/test/test_cpu/test_autoround.py +++ b/test/test_cpu/test_autoround.py @@ -716,6 +716,9 @@ def test_alg_ext(self): ar = AutoRound(model_name, scheme="W2A16", iters=1, nsamples=1, enable_alg_ext=True) ar.quantize() + def test_alg_ext_import(self): + from auto_round.alg_ext import quantize_block_ext + def test_invalid_layer_config(self): with self.assertRaises(ValueError): layer_config = {"model.decoder.layers.2.self_attnx": {"bits": 2}} diff --git a/test/test_cuda/test_alg_ext.py b/test/test_cuda/test_alg_ext.py new file mode 100644 index 000000000..06fcaf8a1 --- /dev/null +++ b/test/test_cuda/test_alg_ext.py @@ -0,0 +1,40 @@ +import shutil +import sys +import unittest + +sys.path.insert(0, "../..") + +import torch +from transformers import AutoModelForCausalLM, AutoTokenizer + +from auto_round import AutoRound, AutoRoundConfig +from auto_round.eval.evaluation import simple_evaluate_user_model + + +class TestAlgExt(unittest.TestCase): + + @classmethod + def setUpClass(self): + self.model_name = "/models/opt-125m" + self.save_folder = "./saved" + + @classmethod + def tearDownClass(self): + shutil.rmtree(self.save_folder, ignore_errors=True) + shutil.rmtree("runs", ignore_errors=True) + + def test_2bits(self): + model_name = "/models/opt-125m" + ar = AutoRound(model=model_name, bits=2, group_size=64, enable_alg_ext=True) + ar.quantize_and_save(self.save_folder) + model = AutoModelForCausalLM.from_pretrained( + self.save_folder, + device_map="auto", + ) + + tokenizer = AutoTokenizer.from_pretrained(self.save_folder) + result = simple_evaluate_user_model(model, tokenizer, batch_size=64, tasks="lambada_openai") + print(result["results"]["lambada_openai"]["acc,none"]) + # wo alg ext 0.2084, with 0.2364 + self.assertGreater(result["results"]["lambada_openai"]["acc,none"], 0.22) + shutil.rmtree(self.save_folder, ignore_errors=True)