From 12c87d5f08ae7e1466a2d40a069482df6069c8a7 Mon Sep 17 00:00:00 2001 From: "Dong, Bo" Date: Fri, 22 Dec 2023 13:22:43 +0800 Subject: [PATCH 1/2] [LLM Runtime] dynamic link the layer to compress binary size (#1059) Co-authored-by: Ding, Yi Signed-off-by: lvliang-intel --- .../llm/runtime/graph/CMakeLists.txt | 3 + .../llm/runtime/graph/cmake/Common.cmake | 20 +++++- .../llm/runtime/graph/core/CMakeLists.txt | 2 +- .../neural_chat/chatbot.py | 3 + .../neural_chat/models/solar_model.py | 69 +++++++++++++++++++ 5 files changed, 94 insertions(+), 3 deletions(-) create mode 100644 intel_extension_for_transformers/neural_chat/models/solar_model.py diff --git a/intel_extension_for_transformers/llm/runtime/graph/CMakeLists.txt b/intel_extension_for_transformers/llm/runtime/graph/CMakeLists.txt index 13d6c936803..76caa45ef3d 100644 --- a/intel_extension_for_transformers/llm/runtime/graph/CMakeLists.txt +++ b/intel_extension_for_transformers/llm/runtime/graph/CMakeLists.txt @@ -91,6 +91,9 @@ if (NE_GELU_VEC) endif() option(NE_PYTHON_API "neural_engine: use python api" OFF) option(NE_SIMD_VEC_DOT_F16 "neural_engine: enable vec_dot_fp16 SIMD optimization" ON) + +option(BUILD_SHARED_LIBS "If build as shared libs" ON) + if (NE_SIMD_VEC_DOT_F16) add_compile_definitions(NE_SIMD_VEC_DOT_F16) endif() diff --git a/intel_extension_for_transformers/llm/runtime/graph/cmake/Common.cmake b/intel_extension_for_transformers/llm/runtime/graph/cmake/Common.cmake index e10412aa687..d3e266ce668 100644 --- a/intel_extension_for_transformers/llm/runtime/graph/cmake/Common.cmake +++ b/intel_extension_for_transformers/llm/runtime/graph/cmake/Common.cmake @@ -36,9 +36,25 @@ function(add_executable_w_warning TARGET) warning_check(${TARGET}) endfunction() -function(add_library_w_warning TARGET) - add_library(${TARGET} STATIC ${ARGN}) +function(add_library_w_warning_ TARGET) + add_library(${TARGET} ${ARGN}) set_target_properties(${TARGET} PROPERTIES C_STANDARD 11 C_STANDARD_REQUIRED ON C_EXTENSIONS OFF) set_target_properties(${TARGET} PROPERTIES CXX_STANDARD 11 CXX_STANDARD_REQUIRED ON CXX_EXTENSIONS OFF) warning_check(${TARGET}) endfunction() + +function(add_library_w_warning TARGET) + add_library_w_warning_(${TARGET} STATIC ${ARGN}) +endfunction() + +function(add_shared_library_w_warning TARGET) + add_library_w_warning_(${TARGET} SHARED ${ARGN}) +endfunction() + +function(add_shareable_library_w_warning TARGET) + if (BUILD_SHARED_LIBS) + add_library_w_warning_(${TARGET} SHARED ${ARGN}) + else() + add_library_w_warning_(${TARGET} STATIC ${ARGN}) + endif() +endfunction() diff --git a/intel_extension_for_transformers/llm/runtime/graph/core/CMakeLists.txt b/intel_extension_for_transformers/llm/runtime/graph/core/CMakeLists.txt index b77e8b56d10..bcf34a9ca4b 100644 --- a/intel_extension_for_transformers/llm/runtime/graph/core/CMakeLists.txt +++ b/intel_extension_for_transformers/llm/runtime/graph/core/CMakeLists.txt @@ -16,7 +16,7 @@ find_package(Threads REQUIRED) file(GLOB layers_srcs "layers/*.cpp") set(sources ne_layers.c ${layers_srcs}) -add_library_w_warning(ne_layers "${sources}") +add_shareable_library_w_warning(ne_layers "${sources}") target_include_directories(ne_layers PUBLIC .) target_compile_features(ne_layers PUBLIC c_std_11) # don't bump diff --git a/intel_extension_for_transformers/neural_chat/chatbot.py b/intel_extension_for_transformers/neural_chat/chatbot.py index b119d9622f7..b4d9613365f 100644 --- a/intel_extension_for_transformers/neural_chat/chatbot.py +++ b/intel_extension_for_transformers/neural_chat/chatbot.py @@ -87,6 +87,9 @@ def build_chatbot(config: PipelineConfig=None): elif "mistral" in config.model_name_or_path.lower(): from .models.mistral_model import MistralModel adapter = MistralModel() + elif "solar" in config.model_name_or_path.lower(): + from .models.solar_model import SolarModel + adapter = SolarModel() elif "opt" in config.model_name_or_path.lower() or \ "gpt" in config.model_name_or_path.lower() or \ "flan-t5" in config.model_name_or_path.lower() or \ diff --git a/intel_extension_for_transformers/neural_chat/models/solar_model.py b/intel_extension_for_transformers/neural_chat/models/solar_model.py new file mode 100644 index 00000000000..d1f29ae1028 --- /dev/null +++ b/intel_extension_for_transformers/neural_chat/models/solar_model.py @@ -0,0 +1,69 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .base_model import BaseModel, register_model_adapter +import logging +from fastchat.conversation import get_conv_template, Conversation, register_conv_template, SeparatorStyle + +logging.basicConfig( + format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", + datefmt="%m/%d/%Y %H:%M:%S", + level=logging.INFO, +) +logger = logging.getLogger(__name__) + + +# Solar-10.7B Chat Template +# Reference: https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0/blob/main/tokenizer_config.json +register_conv_template( + Conversation( + name="solar", + system_message="", + roles=("### User", "### Assistant"), + sep_style=SeparatorStyle.ADD_COLON_SPACE_SINGLE, + sep="\n\n", + stop_str="", + ) +) + +class SolarModel(BaseModel): + def match(self, model_path: str): + """ + Check if the provided model_path matches the current model. + + Args: + model_path (str): Path to a model. + + Returns: + bool: True if the model_path matches, False otherwise. + """ + return "solar-" in model_path.lower() and "instruct" in model_path.lower() + + def get_default_conv_template(self, model_path: str) -> Conversation: + """ + Get the default conversation template for the given model path. + + Args: + model_path (str): Path to the model. + + Returns: + Conversation: A default conversation template. + """ + return get_conv_template("solar") + +register_model_adapter(SolarModel) + From feef5dc9e6e08f6e87eaa0f09273bfc52fc68fc9 Mon Sep 17 00:00:00 2001 From: lvliang-intel Date: Fri, 22 Dec 2023 14:43:33 +0800 Subject: [PATCH 2/2] add optimization support Signed-off-by: lvliang-intel --- .../llm/quantization/optimization.py | 1 + 1 file changed, 1 insertion(+) diff --git a/intel_extension_for_transformers/llm/quantization/optimization.py b/intel_extension_for_transformers/llm/quantization/optimization.py index 6710f8d85fb..30062aa7e58 100644 --- a/intel_extension_for_transformers/llm/quantization/optimization.py +++ b/intel_extension_for_transformers/llm/quantization/optimization.py @@ -56,6 +56,7 @@ def optimize(self, model, use_llm_runtime=False): or re.search("neural-chat-7b-v2", model_name, re.IGNORECASE) or re.search("neural-chat-7b-v3", model_name, re.IGNORECASE) or re.search("starcoder", model_name, re.IGNORECASE) + or re.search("solar", model_name, re.IGNORECASE) ): from intel_extension_for_transformers.transformers import AutoModelForCausalLM optimized_model = AutoModelForCausalLM.from_pretrained(