[NeuralChat] support return error code (#650)

* support error coed for NeuralChat Signed-off-by: lvliang-intel <liang1.lv@intel.com>
intel · Dec 12, 2023 · ea173a7 · ea173a7
1 parent f892afb
commit ea173a7
Show file tree

Hide file tree

Showing 7 changed files with 580 additions and 114 deletions.
diff --git a/intel_extension_for_transformers/neural_chat/chatbot.py b/intel_extension_for_transformers/neural_chat/chatbot.py
@@ -16,13 +16,16 @@
 # limitations under the License.
 """Neural Chat Chatbot API."""
 
-import os
 from intel_extension_for_transformers.llm.quantization.optimization import Optimization
 from .config import PipelineConfig
 from .config import BaseFinetuningConfig
 from .config import DeviceOptions
 from .plugins import plugins
 
+from .errorcode import ErrorCodes, STORAGE_THRESHOLD_GB
+from .utils.error_utils import set_latest_error
+import psutil
+import torch
 from .config_logging import configure_logging
 logger = configure_logging()
 
@@ -41,13 +44,29 @@ def build_chatbot(config: PipelineConfig=None):
         pipeline = build_chatbot()
         response = pipeline.predict(query="Tell me about Intel Xeon Scalable Processors.")
     """
+    # Check for out of storage
+    available_storage = psutil.disk_usage('/').free
+    available_storage_gb = available_storage / (1024 ** 3)
+    if available_storage_gb < STORAGE_THRESHOLD_GB:
+        set_latest_error(ErrorCodes.ERROR_OUT_OF_STORAGE)
+        return
+
     global plugins
     if not config:
         config = PipelineConfig()
     # Validate input parameters
     if config.device not in [option.name.lower() for option in DeviceOptions]:
-        valid_options = ", ".join([option.name.lower() for option in DeviceOptions])
-        raise ValueError(f"Invalid device value '{config.device}'. Must be one of {valid_options}")
+        set_latest_error(ErrorCodes.ERROR_DEVICE_NOT_SUPPORTED)
+        return
+
+    if config.device == "cuda":
+        if not torch.cuda.is_available():
+            set_latest_error(ErrorCodes.ERROR_DEVICE_NOT_FOUND)
+            return
+    elif config.device == "xpu":
+        if not torch.xpu.is_available():
+            set_latest_error(ErrorCodes.ERROR_DEVICE_NOT_FOUND)
+            return
 
     # create model adapter
     if "llama" in config.model_name_or_path.lower():
@@ -76,8 +95,8 @@ def build_chatbot(config: PipelineConfig=None):
         from .models.base_model import BaseModel
         adapter = BaseModel()
     else:
-        raise ValueError("NeuralChat Error: Unsupported model name or path, \
-           only supports FLAN-T5/LLAMA/MPT/GPT/BLOOM/OPT/QWEN/NEURAL-CHAT/MISTRAL/CODELLAMA/STARCODER now.")
+        set_latest_error(ErrorCodes.ERROR_MODEL_NOT_SUPPORTED)
+        return
 
     # register plugin instance in model adaptor
     if config.plugins:
@@ -112,9 +131,10 @@ def build_chatbot(config: PipelineConfig=None):
                     from .pipeline.plugins.image2image.image2image import Image2Image
                     plugins[plugin_name]['class'] = Image2Image
                 else: # pragma: no cover
-                    raise ValueError("NeuralChat Error: Unsupported plugin")
-                logger.info("create %s plugin instance...", plugin_name)
-                logger.info("plugin parameters: %s", plugin_value['args'])
+                    set_latest_error(ErrorCodes.ERROR_PLUGIN_NOT_SUPPORTED)
+                    return
+                print(f"create {plugin_name} plugin instance...")
+                print(f"plugin parameters: ", plugin_value['args'])
                 plugins[plugin_name]["instance"] = plugins[plugin_name]['class'](**plugin_value['args'])
                 adapter.register_plugin_instance(plugin_name, plugins[plugin_name]["instance"])
 
@@ -136,8 +156,32 @@ def build_chatbot(config: PipelineConfig=None):
     parameters["hf_access_token"] = config.hf_access_token
     parameters["assistant_model"] = config.assistant_model
 
-    adapter.load_model(parameters)
-
+    try:
+        adapter.load_model(parameters)
+    except RuntimeError as e:
+        if "out of memory" in str(e):
+            set_latest_error(ErrorCodes.ERROR_OUT_OF_MEMORY)
+        elif "devices are busy or unavailable" in str(e):
+            set_latest_error(ErrorCodes.ERROR_DEVICE_BUSY)
+        elif "tensor does not have a device" in str(e):
+            set_latest_error(ErrorCodes.ERROR_DEVICE_NOT_FOUND)
+        else:
+            set_latest_error(ErrorCodes.ERROR_GENERIC)
+    except ValueError as e:
+        if "load_model: unsupported device" in str(e):
+            set_latest_error(ErrorCodes.ERROR_DEVICE_NOT_SUPPORTED)
+        elif "load_model: unsupported model" in str(e):
+            set_latest_error(ErrorCodes.ERROR_MODEL_NOT_SUPPORTED)
+        elif "load_model: tokenizer is not found" in str(e):
+            set_latest_error(ErrorCodes.ERROR_TOKENIZER_NOT_FOUND)
+        elif "load_model: model name or path is not found" in str(e):
+            set_latest_error(ErrorCodes.ERROR_MODEL_NOT_FOUND)
+        elif "load_model: model config is not found" in str(e):
+            set_latest_error(ErrorCodes.ERROR_MODEL_CONFIG_NOT_FOUND)
+        else:
+            set_latest_error(ErrorCodes.ERROR_GENERIC)
+    except Exception as e:
+        set_latest_error(ErrorCodes.ERROR_GENERIC)
     return adapter
 
 def finetune_model(config: BaseFinetuningConfig):
@@ -150,7 +194,29 @@ def finetune_model(config: BaseFinetuningConfig):
     assert config is not None, "BaseFinetuningConfig is needed for finetuning."
     from intel_extension_for_transformers.llm.finetuning.finetuning import Finetuning
     finetuning = Finetuning(config)
-    finetuning.finetune()
+    try:
+        finetuning.finetune()
+    except FileNotFoundError as e:
+        if "Couldn't find a dataset script" in str(e):
+            set_latest_error(ErrorCodes.ERROR_DATASET_NOT_FOUND)
+    except ValueError as e:
+        if "--do_eval requires a validation dataset" in str(e):
+            set_latest_error(ErrorCodes.ERROR_VALIDATION_FILE_NOT_FOUND)
+        elif "--do_train requires a train dataset" in str(e):
+            set_latest_error(ErrorCodes.ERROR_TRAIN_FILE_NOT_FOUND)
+    except Exception as e:
+        if config.finetune_args.peft == "lora":
+            set_latest_error(ErrorCodes.ERROR_LORA_FINETUNE_FAIL)
+        elif config.finetune_args.peft == "llama_adapter":
+            set_latest_error(ErrorCodes.ERROR_LLAMA_ADAPTOR_FINETUNE_FAIL)
+        elif config.finetune_args.peft == "ptun":
+            set_latest_error(ErrorCodes.ERROR_PTUN_FINETUNE_FAIL)
+        elif config.finetune_args.peft == "prefix":
+            set_latest_error(ErrorCodes.ERROR_PREFIX_FINETUNE_FAIL)
+        elif config.finetune_args.peft == "prompt":
+            set_latest_error(ErrorCodes.ERROR_PROMPT_FINETUNE_FAIL)
+        else:
+            set_latest_error(ErrorCodes.ERROR_GENERIC)
 
 def optimize_model(model, config, use_llm_runtime=False):
     """Optimize the model based on the provided configuration.
@@ -161,5 +227,18 @@ def optimize_model(model, config, use_llm_runtime=False):
         use_llm_runtime (bool): A boolean indicating whether to use the LLM runtime graph optimization.
     """
     optimization = Optimization(optimization_config=config)
-    model = optimization.optimize(model, use_llm_runtime)
-    return model
+    try:
+        model = optimization.optimize(model, use_llm_runtime)
+    except Exception as e:
+        from intel_extension_for_transformers.transformers import (
+            MixedPrecisionConfig,
+            WeightOnlyQuantConfig,
+            BitsAndBytesConfig
+        )
+        if type(config) == MixedPrecisionConfig:
+            set_latest_error(ErrorCodes.ERROR_AMP_OPTIMIZATION_FAIL)
+        elif type(config) == WeightOnlyQuantConfig:
+            set_latest_error(ErrorCodes.ERROR_WEIGHT_ONLY_QUANT_OPTIMIZATION_FAIL)
+        elif type(config) == BitsAndBytesConfig:
+            set_latest_error(ErrorCodes.ERROR_BITS_AND_BYTES_OPTIMIZATION_FAIL)
+    return model
diff --git a/intel_extension_for_transformers/neural_chat/errorcode.py b/intel_extension_for_transformers/neural_chat/errorcode.py
@@ -0,0 +1,109 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Error code and constant value for Neural Chat."""
+
+STORAGE_THRESHOLD_GB = 30
+GPU_MEMORY_THRESHOLD_MB = 6
+
+class ErrorCodes:
+    # General Service Error Code - System related
+    ERROR_OUT_OF_MEMORY = 1001 # out of memory
+    ERROR_DEVICE_BUSY = 1002 # device busy
+    ERROR_DEVICE_NOT_FOUND = 1003 # device not exist
+    ERROR_OUT_OF_STORAGE = 1004 # out of storage
+    ERROR_DEVICE_NOT_SUPPORTED = 1005 # device not support
+    ERROR_PLUGIN_NOT_SUPPORTED = 1006 # plugin not support
+
+    # General Service Error Code - Model related
+    ERROR_MODEL_NOT_FOUND = 2001
+    ERROR_MODEL_CONFIG_NOT_FOUND = 2002
+    ERROR_TOKENIZER_NOT_FOUND = 2003
+    ERROR_CACHE_DIR_NO_WRITE_PERMISSION = 2004
+    ERROR_INVALID_MODEL_VERSION = 2005
+    ERROR_MODEL_NOT_SUPPORTED = 2006
+    WARNING_INPUT_EXCEED_MAX_SEQ_LENGTH = 2101
+
+    # General Service Error Code - Dataset related
+    ERROR_DATASET_NOT_FOUND = 3001
+    ERROR_DATASET_CONFIG_NOT_FOUND = 3002
+    ERROR_VALIDATION_FILE_NOT_FOUND = 3003
+    ERROR_TRAIN_FILE_NOT_FOUND = 3004
+    ERROR_DATASET_CACHE_DIR_NO_WRITE_PERMISSION = 3005
+
+    # Advanced Service Error Code - Finetune related
+    ERROR_PTUN_FINETUNE_FAIL = 4001
+    ERROR_LORA_FINETUNE_FAIL = 4002
+    ERROR_LLAMA_ADAPTOR_FINETUNE_FAIL = 4003
+    ERROR_PREFIX_FINETUNE_FAIL = 4004
+    ERROR_PROMPT_FINETUNE_FAIL = 4005
+
+    # Advanced Service Error Code - Inference related
+    ERROR_WEIGHT_ONLY_QUANT_OPTIMIZATION_FAIL = 5001
+    ERROR_AMP_OPTIMIZATION_FAIL = 5002
+    ERROR_AUDIO_FORMAT_NOT_SUPPORTED = 5003
+    ERROR_RETRIEVAL_DOC_FORMAT_NOT_SUPPORTED = 5004
+    ERROR_SENSITIVE_CHECK_FILE_NOT_FOUND = 5005
+    ERROR_MEMORY_CONTROL_FAIL = 5006
+    ERROR_INTENT_DETECT_FAIL = 5007
+    ERROR_MODEL_INFERENCE_FAIL = 5008
+    ERROR_BITS_AND_BYTES_OPTIMIZATION_FAIL = 5009
+
+    # General Service Error Code - Unknown Errors
+    ERROR_GENERIC = 9999
+
+    SUCCESS = 0  # The operation is executed successfully
+
+    error_strings = {
+        ERROR_OUT_OF_MEMORY: "System ran out of memory",
+        ERROR_DEVICE_BUSY: "Device is currently busy",
+        ERROR_DEVICE_NOT_FOUND: "Device does not exist",
+        ERROR_OUT_OF_STORAGE: "System has run out of storage",
+        ERROR_DEVICE_NOT_SUPPORTED: "Device is not supported",
+        ERROR_PLUGIN_NOT_SUPPORTED: "Plugin is not supported",
+
+        ERROR_MODEL_NOT_FOUND: "Requested model was not found",
+        ERROR_MODEL_CONFIG_NOT_FOUND: "Model configuration not found",
+        ERROR_TOKENIZER_NOT_FOUND: "Tokenizer not found",
+        ERROR_CACHE_DIR_NO_WRITE_PERMISSION: "No write permission in cache directory",
+        ERROR_INVALID_MODEL_VERSION: "Invalid model version",
+        ERROR_MODEL_NOT_SUPPORTED: "Model is not supported",
+        WARNING_INPUT_EXCEED_MAX_SEQ_LENGTH: "Input sequence exceeds maximum length",
+
+        ERROR_DATASET_NOT_FOUND: "Dataset was not found",
+        ERROR_DATASET_CONFIG_NOT_FOUND: "Dataset configuration not found",
+        ERROR_VALIDATION_FILE_NOT_FOUND: "Validation file not found",
+        ERROR_TRAIN_FILE_NOT_FOUND: "Training file not found",
+        ERROR_DATASET_CACHE_DIR_NO_WRITE_PERMISSION: "No write permission in dataset cache directory",
+
+        ERROR_PTUN_FINETUNE_FAIL: "PTUN finetuning failed",
+        ERROR_LORA_FINETUNE_FAIL: "LORA finetuning failed",
+        ERROR_LLAMA_ADAPTOR_FINETUNE_FAIL: "LLAMA Adaptor finetuning failed",
+        ERROR_PREFIX_FINETUNE_FAIL: "Prefix finetuning failed",
+        ERROR_PROMPT_FINETUNE_FAIL: "Prompt finetuning failed",
+
+        ERROR_WEIGHT_ONLY_QUANT_OPTIMIZATION_FAIL: "Weight-only quantization optimization failed",
+        ERROR_AMP_OPTIMIZATION_FAIL: "AMP optimization failed",
+        ERROR_AUDIO_FORMAT_NOT_SUPPORTED: "Audio format is not supported",
+        ERROR_RETRIEVAL_DOC_FORMAT_NOT_SUPPORTED: "Retrieval document format is not supported",
+        ERROR_SENSITIVE_CHECK_FILE_NOT_FOUND: "Sensitive check file not found",
+        ERROR_MEMORY_CONTROL_FAIL: "Memory control failed",
+        ERROR_INTENT_DETECT_FAIL: "Intent detection failed",
+        ERROR_MODEL_INFERENCE_FAIL: "Model inference failed",
+        ERROR_BITS_AND_BYTES_OPTIMIZATION_FAIL: "Bits and bytes optimization failed",
+
+        ERROR_GENERIC: "Generic error"
+    }
diff --git a/intel_extension_for_transformers/neural_chat/models/base_model.py b/intel_extension_for_transformers/neural_chat/models/base_model.py
@@ -24,6 +24,8 @@
 from ..utils.common import is_audio_file
 from .model_utils import load_model, predict, predict_stream, MODELS
 from ..prompts import PromptTemplate
+from ..utils.error_utils import set_latest_error
+from ..errorcode import ErrorCodes
 import logging
 logging.basicConfig(
     format="%(asctime)s %(name)s:%(levelname)s:%(message)s",
@@ -182,7 +184,12 @@ def predict_stream(self, query, origin_query="", config=None):
                             if response == "Response with template.":
                                 return plugin_instance.response_template, link
                         else:
-                            response = plugin_instance.pre_llm_inference_actions(query)
+                            try:
+                                response = plugin_instance.pre_llm_inference_actions(query)
+                            except Exception as e:
+                                if plugin_name == "asr":
+                                    if "[ASR ERROR] Audio format not supported" in str(e):
+                                        set_latest_error(ErrorCodes.ERROR_AUDIO_FORMAT_NOT_SUPPORTED)
                         if plugin_name == "safety_checker":
                             sign1=plugin_instance.pre_llm_inference_actions(my_query)
                             if sign1:
@@ -198,8 +205,12 @@ def predict_stream(self, query, origin_query="", config=None):
 
         if not query_include_prompt and not is_plugin_enabled("retrieval"):
             query = self.prepare_prompt(query, self.model_name, config.task)
-        response = predict_stream(
-            **construct_parameters(query, self.model_name, self.device, self.assistant_model, config))
+
+        try:
+            response = predict_stream(
+                **construct_parameters(query, self.model_name, self.device, self.assistant_model, config))
+        except Exception as e:
+            set_latest_error(ErrorCodes.ERROR_MODEL_INFERENCE_FAIL)
 
         def is_generator(obj):
             return isinstance(obj, types.GeneratorType)
@@ -286,8 +297,11 @@ def predict(self, query, origin_query="", config=None):
             query = conv_template.get_prompt()
 
         # LLM inference
-        response = predict(
-            **construct_parameters(query, self.model_name, self.device, self.assistant_model, config))
+        try:
+            response = predict(
+                **construct_parameters(query, self.model_name, self.device, self.assistant_model, config))
+        except Exception as e:
+            set_latest_error(ErrorCodes.ERROR_MODEL_INFERENCE_FAIL)
 
         # plugin post actions
         for plugin_name in get_registered_plugins():