Skip to content

Commit

Permalink
[NeuralChat] support return error code (#650)
Browse files Browse the repository at this point in the history
* support error coed for NeuralChat

Signed-off-by: lvliang-intel <liang1.lv@intel.com>
  • Loading branch information
lvliang-intel committed Dec 12, 2023
1 parent f892afb commit ea173a7
Show file tree
Hide file tree
Showing 7 changed files with 580 additions and 114 deletions.
105 changes: 92 additions & 13 deletions intel_extension_for_transformers/neural_chat/chatbot.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,16 @@
# limitations under the License.
"""Neural Chat Chatbot API."""

import os
from intel_extension_for_transformers.llm.quantization.optimization import Optimization
from .config import PipelineConfig
from .config import BaseFinetuningConfig
from .config import DeviceOptions
from .plugins import plugins

from .errorcode import ErrorCodes, STORAGE_THRESHOLD_GB
from .utils.error_utils import set_latest_error
import psutil
import torch
from .config_logging import configure_logging
logger = configure_logging()

Expand All @@ -41,13 +44,29 @@ def build_chatbot(config: PipelineConfig=None):
pipeline = build_chatbot()
response = pipeline.predict(query="Tell me about Intel Xeon Scalable Processors.")
"""
# Check for out of storage
available_storage = psutil.disk_usage('/').free
available_storage_gb = available_storage / (1024 ** 3)
if available_storage_gb < STORAGE_THRESHOLD_GB:
set_latest_error(ErrorCodes.ERROR_OUT_OF_STORAGE)
return

global plugins
if not config:
config = PipelineConfig()
# Validate input parameters
if config.device not in [option.name.lower() for option in DeviceOptions]:
valid_options = ", ".join([option.name.lower() for option in DeviceOptions])
raise ValueError(f"Invalid device value '{config.device}'. Must be one of {valid_options}")
set_latest_error(ErrorCodes.ERROR_DEVICE_NOT_SUPPORTED)
return

if config.device == "cuda":
if not torch.cuda.is_available():
set_latest_error(ErrorCodes.ERROR_DEVICE_NOT_FOUND)
return
elif config.device == "xpu":
if not torch.xpu.is_available():
set_latest_error(ErrorCodes.ERROR_DEVICE_NOT_FOUND)
return

# create model adapter
if "llama" in config.model_name_or_path.lower():
Expand Down Expand Up @@ -76,8 +95,8 @@ def build_chatbot(config: PipelineConfig=None):
from .models.base_model import BaseModel
adapter = BaseModel()
else:
raise ValueError("NeuralChat Error: Unsupported model name or path, \
only supports FLAN-T5/LLAMA/MPT/GPT/BLOOM/OPT/QWEN/NEURAL-CHAT/MISTRAL/CODELLAMA/STARCODER now.")
set_latest_error(ErrorCodes.ERROR_MODEL_NOT_SUPPORTED)
return

# register plugin instance in model adaptor
if config.plugins:
Expand Down Expand Up @@ -112,9 +131,10 @@ def build_chatbot(config: PipelineConfig=None):
from .pipeline.plugins.image2image.image2image import Image2Image
plugins[plugin_name]['class'] = Image2Image
else: # pragma: no cover
raise ValueError("NeuralChat Error: Unsupported plugin")
logger.info("create %s plugin instance...", plugin_name)
logger.info("plugin parameters: %s", plugin_value['args'])
set_latest_error(ErrorCodes.ERROR_PLUGIN_NOT_SUPPORTED)
return
print(f"create {plugin_name} plugin instance...")
print(f"plugin parameters: ", plugin_value['args'])
plugins[plugin_name]["instance"] = plugins[plugin_name]['class'](**plugin_value['args'])
adapter.register_plugin_instance(plugin_name, plugins[plugin_name]["instance"])

Expand All @@ -136,8 +156,32 @@ def build_chatbot(config: PipelineConfig=None):
parameters["hf_access_token"] = config.hf_access_token
parameters["assistant_model"] = config.assistant_model

adapter.load_model(parameters)

try:
adapter.load_model(parameters)
except RuntimeError as e:
if "out of memory" in str(e):
set_latest_error(ErrorCodes.ERROR_OUT_OF_MEMORY)
elif "devices are busy or unavailable" in str(e):
set_latest_error(ErrorCodes.ERROR_DEVICE_BUSY)
elif "tensor does not have a device" in str(e):
set_latest_error(ErrorCodes.ERROR_DEVICE_NOT_FOUND)
else:
set_latest_error(ErrorCodes.ERROR_GENERIC)
except ValueError as e:
if "load_model: unsupported device" in str(e):
set_latest_error(ErrorCodes.ERROR_DEVICE_NOT_SUPPORTED)
elif "load_model: unsupported model" in str(e):
set_latest_error(ErrorCodes.ERROR_MODEL_NOT_SUPPORTED)
elif "load_model: tokenizer is not found" in str(e):
set_latest_error(ErrorCodes.ERROR_TOKENIZER_NOT_FOUND)
elif "load_model: model name or path is not found" in str(e):
set_latest_error(ErrorCodes.ERROR_MODEL_NOT_FOUND)
elif "load_model: model config is not found" in str(e):
set_latest_error(ErrorCodes.ERROR_MODEL_CONFIG_NOT_FOUND)
else:
set_latest_error(ErrorCodes.ERROR_GENERIC)
except Exception as e:
set_latest_error(ErrorCodes.ERROR_GENERIC)
return adapter

def finetune_model(config: BaseFinetuningConfig):
Expand All @@ -150,7 +194,29 @@ def finetune_model(config: BaseFinetuningConfig):
assert config is not None, "BaseFinetuningConfig is needed for finetuning."
from intel_extension_for_transformers.llm.finetuning.finetuning import Finetuning
finetuning = Finetuning(config)
finetuning.finetune()
try:
finetuning.finetune()
except FileNotFoundError as e:
if "Couldn't find a dataset script" in str(e):
set_latest_error(ErrorCodes.ERROR_DATASET_NOT_FOUND)
except ValueError as e:
if "--do_eval requires a validation dataset" in str(e):
set_latest_error(ErrorCodes.ERROR_VALIDATION_FILE_NOT_FOUND)
elif "--do_train requires a train dataset" in str(e):
set_latest_error(ErrorCodes.ERROR_TRAIN_FILE_NOT_FOUND)
except Exception as e:
if config.finetune_args.peft == "lora":
set_latest_error(ErrorCodes.ERROR_LORA_FINETUNE_FAIL)
elif config.finetune_args.peft == "llama_adapter":
set_latest_error(ErrorCodes.ERROR_LLAMA_ADAPTOR_FINETUNE_FAIL)
elif config.finetune_args.peft == "ptun":
set_latest_error(ErrorCodes.ERROR_PTUN_FINETUNE_FAIL)
elif config.finetune_args.peft == "prefix":
set_latest_error(ErrorCodes.ERROR_PREFIX_FINETUNE_FAIL)
elif config.finetune_args.peft == "prompt":
set_latest_error(ErrorCodes.ERROR_PROMPT_FINETUNE_FAIL)
else:
set_latest_error(ErrorCodes.ERROR_GENERIC)

def optimize_model(model, config, use_llm_runtime=False):
"""Optimize the model based on the provided configuration.
Expand All @@ -161,5 +227,18 @@ def optimize_model(model, config, use_llm_runtime=False):
use_llm_runtime (bool): A boolean indicating whether to use the LLM runtime graph optimization.
"""
optimization = Optimization(optimization_config=config)
model = optimization.optimize(model, use_llm_runtime)
return model
try:
model = optimization.optimize(model, use_llm_runtime)
except Exception as e:
from intel_extension_for_transformers.transformers import (
MixedPrecisionConfig,
WeightOnlyQuantConfig,
BitsAndBytesConfig
)
if type(config) == MixedPrecisionConfig:
set_latest_error(ErrorCodes.ERROR_AMP_OPTIMIZATION_FAIL)
elif type(config) == WeightOnlyQuantConfig:
set_latest_error(ErrorCodes.ERROR_WEIGHT_ONLY_QUANT_OPTIMIZATION_FAIL)
elif type(config) == BitsAndBytesConfig:
set_latest_error(ErrorCodes.ERROR_BITS_AND_BYTES_OPTIMIZATION_FAIL)
return model
109 changes: 109 additions & 0 deletions intel_extension_for_transformers/neural_chat/errorcode.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright (c) 2023 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Error code and constant value for Neural Chat."""

STORAGE_THRESHOLD_GB = 30
GPU_MEMORY_THRESHOLD_MB = 6

class ErrorCodes:
# General Service Error Code - System related
ERROR_OUT_OF_MEMORY = 1001 # out of memory
ERROR_DEVICE_BUSY = 1002 # device busy
ERROR_DEVICE_NOT_FOUND = 1003 # device not exist
ERROR_OUT_OF_STORAGE = 1004 # out of storage
ERROR_DEVICE_NOT_SUPPORTED = 1005 # device not support
ERROR_PLUGIN_NOT_SUPPORTED = 1006 # plugin not support

# General Service Error Code - Model related
ERROR_MODEL_NOT_FOUND = 2001
ERROR_MODEL_CONFIG_NOT_FOUND = 2002
ERROR_TOKENIZER_NOT_FOUND = 2003
ERROR_CACHE_DIR_NO_WRITE_PERMISSION = 2004
ERROR_INVALID_MODEL_VERSION = 2005
ERROR_MODEL_NOT_SUPPORTED = 2006
WARNING_INPUT_EXCEED_MAX_SEQ_LENGTH = 2101

# General Service Error Code - Dataset related
ERROR_DATASET_NOT_FOUND = 3001
ERROR_DATASET_CONFIG_NOT_FOUND = 3002
ERROR_VALIDATION_FILE_NOT_FOUND = 3003
ERROR_TRAIN_FILE_NOT_FOUND = 3004
ERROR_DATASET_CACHE_DIR_NO_WRITE_PERMISSION = 3005

# Advanced Service Error Code - Finetune related
ERROR_PTUN_FINETUNE_FAIL = 4001
ERROR_LORA_FINETUNE_FAIL = 4002
ERROR_LLAMA_ADAPTOR_FINETUNE_FAIL = 4003
ERROR_PREFIX_FINETUNE_FAIL = 4004
ERROR_PROMPT_FINETUNE_FAIL = 4005

# Advanced Service Error Code - Inference related
ERROR_WEIGHT_ONLY_QUANT_OPTIMIZATION_FAIL = 5001
ERROR_AMP_OPTIMIZATION_FAIL = 5002
ERROR_AUDIO_FORMAT_NOT_SUPPORTED = 5003
ERROR_RETRIEVAL_DOC_FORMAT_NOT_SUPPORTED = 5004
ERROR_SENSITIVE_CHECK_FILE_NOT_FOUND = 5005
ERROR_MEMORY_CONTROL_FAIL = 5006
ERROR_INTENT_DETECT_FAIL = 5007
ERROR_MODEL_INFERENCE_FAIL = 5008
ERROR_BITS_AND_BYTES_OPTIMIZATION_FAIL = 5009

# General Service Error Code - Unknown Errors
ERROR_GENERIC = 9999

SUCCESS = 0 # The operation is executed successfully

error_strings = {
ERROR_OUT_OF_MEMORY: "System ran out of memory",
ERROR_DEVICE_BUSY: "Device is currently busy",
ERROR_DEVICE_NOT_FOUND: "Device does not exist",
ERROR_OUT_OF_STORAGE: "System has run out of storage",
ERROR_DEVICE_NOT_SUPPORTED: "Device is not supported",
ERROR_PLUGIN_NOT_SUPPORTED: "Plugin is not supported",

ERROR_MODEL_NOT_FOUND: "Requested model was not found",
ERROR_MODEL_CONFIG_NOT_FOUND: "Model configuration not found",
ERROR_TOKENIZER_NOT_FOUND: "Tokenizer not found",
ERROR_CACHE_DIR_NO_WRITE_PERMISSION: "No write permission in cache directory",
ERROR_INVALID_MODEL_VERSION: "Invalid model version",
ERROR_MODEL_NOT_SUPPORTED: "Model is not supported",
WARNING_INPUT_EXCEED_MAX_SEQ_LENGTH: "Input sequence exceeds maximum length",

ERROR_DATASET_NOT_FOUND: "Dataset was not found",
ERROR_DATASET_CONFIG_NOT_FOUND: "Dataset configuration not found",
ERROR_VALIDATION_FILE_NOT_FOUND: "Validation file not found",
ERROR_TRAIN_FILE_NOT_FOUND: "Training file not found",
ERROR_DATASET_CACHE_DIR_NO_WRITE_PERMISSION: "No write permission in dataset cache directory",

ERROR_PTUN_FINETUNE_FAIL: "PTUN finetuning failed",
ERROR_LORA_FINETUNE_FAIL: "LORA finetuning failed",
ERROR_LLAMA_ADAPTOR_FINETUNE_FAIL: "LLAMA Adaptor finetuning failed",
ERROR_PREFIX_FINETUNE_FAIL: "Prefix finetuning failed",
ERROR_PROMPT_FINETUNE_FAIL: "Prompt finetuning failed",

ERROR_WEIGHT_ONLY_QUANT_OPTIMIZATION_FAIL: "Weight-only quantization optimization failed",
ERROR_AMP_OPTIMIZATION_FAIL: "AMP optimization failed",
ERROR_AUDIO_FORMAT_NOT_SUPPORTED: "Audio format is not supported",
ERROR_RETRIEVAL_DOC_FORMAT_NOT_SUPPORTED: "Retrieval document format is not supported",
ERROR_SENSITIVE_CHECK_FILE_NOT_FOUND: "Sensitive check file not found",
ERROR_MEMORY_CONTROL_FAIL: "Memory control failed",
ERROR_INTENT_DETECT_FAIL: "Intent detection failed",
ERROR_MODEL_INFERENCE_FAIL: "Model inference failed",
ERROR_BITS_AND_BYTES_OPTIMIZATION_FAIL: "Bits and bytes optimization failed",

ERROR_GENERIC: "Generic error"
}
24 changes: 19 additions & 5 deletions intel_extension_for_transformers/neural_chat/models/base_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
from ..utils.common import is_audio_file
from .model_utils import load_model, predict, predict_stream, MODELS
from ..prompts import PromptTemplate
from ..utils.error_utils import set_latest_error
from ..errorcode import ErrorCodes
import logging
logging.basicConfig(
format="%(asctime)s %(name)s:%(levelname)s:%(message)s",
Expand Down Expand Up @@ -182,7 +184,12 @@ def predict_stream(self, query, origin_query="", config=None):
if response == "Response with template.":
return plugin_instance.response_template, link
else:
response = plugin_instance.pre_llm_inference_actions(query)
try:
response = plugin_instance.pre_llm_inference_actions(query)
except Exception as e:
if plugin_name == "asr":
if "[ASR ERROR] Audio format not supported" in str(e):
set_latest_error(ErrorCodes.ERROR_AUDIO_FORMAT_NOT_SUPPORTED)
if plugin_name == "safety_checker":
sign1=plugin_instance.pre_llm_inference_actions(my_query)
if sign1:
Expand All @@ -198,8 +205,12 @@ def predict_stream(self, query, origin_query="", config=None):

if not query_include_prompt and not is_plugin_enabled("retrieval"):
query = self.prepare_prompt(query, self.model_name, config.task)
response = predict_stream(
**construct_parameters(query, self.model_name, self.device, self.assistant_model, config))

try:
response = predict_stream(
**construct_parameters(query, self.model_name, self.device, self.assistant_model, config))
except Exception as e:
set_latest_error(ErrorCodes.ERROR_MODEL_INFERENCE_FAIL)

def is_generator(obj):
return isinstance(obj, types.GeneratorType)
Expand Down Expand Up @@ -286,8 +297,11 @@ def predict(self, query, origin_query="", config=None):
query = conv_template.get_prompt()

# LLM inference
response = predict(
**construct_parameters(query, self.model_name, self.device, self.assistant_model, config))
try:
response = predict(
**construct_parameters(query, self.model_name, self.device, self.assistant_model, config))
except Exception as e:
set_latest_error(ErrorCodes.ERROR_MODEL_INFERENCE_FAIL)

# plugin post actions
for plugin_name in get_registered_plugins():
Expand Down

0 comments on commit ea173a7

Please sign in to comment.