Clean dependencies in requirements.txt and setup.py (#1389)

intel · Mar 18, 2024 · 4368478 · 4368478
1 parent a8e5295
commit 4368478
Show file tree

Hide file tree

Showing 8 changed files with 14 additions and 48 deletions.
diff --git a/examples/huggingface/pytorch/text-generation/README.md b/examples/huggingface/pytorch/text-generation/README.md
@@ -143,7 +143,7 @@ Dataset: lambada-openai
 | databricks/dolly-v2-3b   | alpha 0.5 | 62.97% | 60.86% | 62.47% (w/o BF16) |
 
 
-## Purpose of the Language Models Optimization for Intel Architecture 
+## Purpose of the Language Models Optimization for Intel Architecture
 
 
 - Demonstrate the AI workloads and deep learning models Intel has optimized and validated to run on Intel hardware 

diff --git a/examples/huggingface/pytorch/text-generation/quantization/README.md b/examples/huggingface/pytorch/text-generation/quantization/README.md
@@ -131,7 +131,7 @@ python run_generation.py \
 # # Weight Only Quantization for GPU device
 >**Note**: 
 > 1.  default search algorithm is beam search with num_beams = 1.
-> 2. [ipex.optimize_transformers](https://github.com/intel/intel-extension-for-pytorch/blob/v2.1.10%2Bxpu/docs/tutorials/llm/llm_optimize_transformers.md) sSupport for the optimized inference of model types "gptj," "mistral," "qwen," and "llama" to achieve high performance and accuracy. Ensure accurate inference for other model types as well.
+> 2. [ipex.optimize_transformers](https://github.com/intel/intel-extension-for-pytorch/blob/v2.1.10%2Bxpu/docs/tutorials/llm/llm_optimize_transformers.md) Support for the optimized inference of model types "gptj," "mistral," "qwen," and "llama" to achieve high performance and accuracy. Ensure accurate inference for other model types as well.
 ## Prerequisite
 ### Create Environment
 Pytorch and Intel-extension-for-pytorch version for intel GPU > 2.1 are required, python version requests equal or higher than 3.9 due to [text evaluation library](https://github.com/EleutherAI/lm-evaluation-harness/tree/master) limitation, the dependent packages are listed in requirements_GPU.txt, we recommend create environment as the following steps. For Intel-exension-for-pytorch, we should install from source code now, and Intel-extension-for-pytorch will add weight-only quantization in the next version.

diff --git a/intel_extension_for_transformers/llm/quantization/utils.py b/intel_extension_for_transformers/llm/quantization/utils.py
@@ -475,12 +475,6 @@ def default_calib_func(model):
         if config.quant_method.value not in ["awq"]:
             calib_func = None
 
-        orig_dtype = torch.float32
-        for param in model.parameters():
-            orig_dtype = param.dtype
-            if orig_dtype != torch.float32:
-                model.to(dtype=torch.float32)
-            break
         inc_model = quantization.fit(
             model, conf, calib_func=calib_func, calib_dataloader=calib_dataloader
         )
@@ -503,8 +497,6 @@ def default_calib_func(model):
                 q_model = replace_linear(
                     inc_model.model, None, None, config, device=device
                 )
-        if orig_dtype != torch.float32:
-            q_model.to(dtype=orig_dtype)
 
         return q_model.to(device)
 

diff --git a/requirements-cpu.txt b/requirements-cpu.txt
@@ -0,0 +1,5 @@
+--extra-index-url https://download.pytorch.org/whl/cpu
+cmake
+ninja
+torch
+
diff --git a/requirements-gpu.txt b/requirements-gpu.txt
diff --git a/requirements.txt b/requirements.txt
@@ -1,10 +1,3 @@
---extra-index-url https://download.pytorch.org/whl/cpu
-accelerate
-cmake>=3.16
-gguf
-ninja
-optimum-intel
 py-cpuinfo
 setuptools>=65
 setuptools_scm[toml]>=6.2
-torch==2.2.0+cpu
diff --git a/setup.py b/setup.py
@@ -10,6 +10,10 @@
 from setuptools.command.build_ext import build_ext
 
 
+result = subprocess.Popen("pip install -r requirements.txt", shell=True)
+result.wait()
+
+
 def get_gpu_family():
     ''' Get gpu device family info.
 
@@ -57,6 +61,9 @@ def check_env_flag(name: str, default: bool = False) -> bool:
     SKIP_RUNTIME = True
     RUNTIME_ONLY = False
     IS_INTEL_GPU = True
+else:
+    result = subprocess.Popen("pip install -r requirements-cpu.txt", shell=True)
+    result.wait()
 
 if not SKIP_RUNTIME:
     from cmake import CMAKE_BIN_DIR

diff --git a/setup_env_gpu.sh b/setup_env_gpu.sh