update readme (#298)

huggingface · Apr 21, 2023 · 1330d38 · 1330d38
1 parent c4cbc3b
commit 1330d38
Show file tree

Hide file tree

Showing 2 changed files with 3 additions and 4 deletions.
diff --git a/README.md b/README.md
@@ -41,8 +41,6 @@ where `extras` can be one or more of `neural-compressor`, `openvino`, `nncf`.
 
 ## Neural Compressor
 
-#### Dynamic quantization:
-
 Dynamic quantization can be used through the Optimum command-line interface:
 
 ```bash
@@ -62,7 +60,7 @@ loaded_model_from_hub = INCModelForSequenceClassification.from_pretrained(
 
 You can load many more quantized models hosted on the hub under the Intel organization [`here`](https://huggingface.co/Intel).
 
-For more details, please refer to this [guide](https://huggingface.co/docs/optimum/main/en/intel/optimization_inc#apply-quantization-using-the-cli).
+For more details on the supported compression techniques, please refer to the [documentation](https://huggingface.co/docs/optimum/main/en/intel/optimization_inc).
 
 
 ## OpenVINO

diff --git a/optimum/intel/generation/modeling.py b/optimum/intel/generation/modeling.py
@@ -191,6 +191,7 @@ def _from_transformers(
         subfolder: str = "",
         local_files_only: bool = False,
         use_cache: bool = True,
+        torch_dtype: Optional[Union[str, "torch.dtype"]] = None,
         **kwargs,
     ):
         if is_torch_version("<", "2.0.0"):
@@ -204,7 +205,7 @@ def _from_transformers(
             "subfolder": subfolder,
             "local_files_only": local_files_only,
             "force_download": force_download,
-            "torch_dtype": kwargs.get("torch_dtype", None),
+            "torch_dtype": torch_dtype,
         }
 
         model = TasksManager.get_model_from_task(task, model_id, **model_kwargs)