[fix] quantization properties for lmi dist and hf acc (#1318)

Co-authored-by: Somasundaram <sindhuvahini.s@gmail.com>
deepjavalibrary · Nov 15, 2023 · f0ea80b · f0ea80b
1 parent 60f5519
commit f0ea80b
Show file tree

Hide file tree

Showing 2 changed files with 12 additions and 12 deletions.
diff --git a/engines/python/setup/djl_python/properties_manager/hf_properties.py b/engines/python/setup/djl_python/properties_manager/hf_properties.py
@@ -165,15 +165,13 @@ def construct_kwargs_quantize(cls, properties):
             if "device_map" not in kwargs:
                 raise ValueError(
                     "device_map should be set when load_in_8bit is set")
-            kwargs["load_in_8bit"] = properties['load_in_8bit']
-            properties['quantize'] = HFQuantizeMethods.bitsandbytes8
+            kwargs["load_in_8bit"] = True
         if properties[
                 'quantize'].value == HFQuantizeMethods.bitsandbytes4.value:
             if "device_map" not in kwargs:
                 raise ValueError(
                     "device_map should set when load_in_4bit is set")
-            kwargs["load_in_4bit"] = properties['load_in_4bit']
-            properties['quantize'] = HFQuantizeMethods.bitsandbytes4
+            kwargs["load_in_4bit"] = True
 
         properties['kwargs'] = kwargs
         return properties

diff --git a/engines/python/setup/djl_python/rolling_batch/lmi_dist_rolling_batch.py b/engines/python/setup/djl_python/rolling_batch/lmi_dist_rolling_batch.py
@@ -23,7 +23,7 @@
 
 import torch
 
-QUANTIZATION_SUPPORT_ALGO = ["bitsandbytes", "gptq"]
+QUANTIZATION_SUPPORT_ALGO = ["bitsandbytes8", "bitsandbytes", "gptq"]
 
 
 class LmiDistRollingBatch(RollingBatch):
@@ -58,15 +58,17 @@ def _init_model(self, kwargs, model_id_or_path):
         revision = self.properties.get('revision', None)
         paged_attention = self.properties.get("paged_attention",
                                               "true").lower() == "true"
-        if quantize is not None and dtype is not None:
-            raise ValueError(
-                f"Can't set both dtype: {dtype} and quantize: {quantize}")
-        if quantize is not None and quantize not in QUANTIZATION_SUPPORT_ALGO:
-            raise ValueError(
-                f"Invalid value for quantize: {quantize}. Valid values when using option rolling_batch=lmi-dist are: {QUANTIZATION_SUPPORT_ALGO}"
-            )
         if quantize is not None:
             os.environ["CUDA_MEMORY_FRACTION"] = "0.9"
+            if dtype is not None:
+                raise ValueError(
+                    f"Can't set both dtype: {dtype} and quantize: {quantize}")
+            if quantize not in QUANTIZATION_SUPPORT_ALGO:
+                raise ValueError(
+                    f"Invalid value for quantize: {quantize}. Valid values when using option rolling_batch=lmi-dist are: {QUANTIZATION_SUPPORT_ALGO}"
+                )
+            if quantize == "bitsandbytes8":
+                quantize = "bitsandbytes"
         if quantize is None and dtype == "int8":
             quantize = "bitsandbytes"
         from lmi_dist.models import get_model