From 12c87d5f08ae7e1466a2d40a069482df6069c8a7 Mon Sep 17 00:00:00 2001
From: "Dong, Bo" <bo1.dong@intel.com>
Date: Fri, 22 Dec 2023 13:22:43 +0800
Subject: [PATCH 1/2] [LLM Runtime] dynamic link the layer to compress binary
 size (#1059)

Co-authored-by: Ding, Yi <yi1.ding@intel.com>
Signed-off-by: lvliang-intel <liang1.lv@intel.com>
---
 .../llm/runtime/graph/CMakeLists.txt          |  3 +
 .../llm/runtime/graph/cmake/Common.cmake      | 20 +++++-
 .../llm/runtime/graph/core/CMakeLists.txt     |  2 +-
 .../neural_chat/chatbot.py                    |  3 +
 .../neural_chat/models/solar_model.py         | 69 +++++++++++++++++++
 5 files changed, 94 insertions(+), 3 deletions(-)
 create mode 100644 intel_extension_for_transformers/neural_chat/models/solar_model.py

diff --git a/intel_extension_for_transformers/llm/runtime/graph/CMakeLists.txt b/intel_extension_for_transformers/llm/runtime/graph/CMakeLists.txt
index 13d6c936803..76caa45ef3d 100644
--- a/intel_extension_for_transformers/llm/runtime/graph/CMakeLists.txt
+++ b/intel_extension_for_transformers/llm/runtime/graph/CMakeLists.txt
@@ -91,6 +91,9 @@ if (NE_GELU_VEC)
 endif()
 option(NE_PYTHON_API             "neural_engine: use python api"                                 OFF)
 option(NE_SIMD_VEC_DOT_F16       "neural_engine: enable vec_dot_fp16 SIMD optimization"          ON)
+
+option(BUILD_SHARED_LIBS        "If build as shared libs"                                        ON)
+
 if (NE_SIMD_VEC_DOT_F16)
     add_compile_definitions(NE_SIMD_VEC_DOT_F16)
 endif()
diff --git a/intel_extension_for_transformers/llm/runtime/graph/cmake/Common.cmake b/intel_extension_for_transformers/llm/runtime/graph/cmake/Common.cmake
index e10412aa687..d3e266ce668 100644
--- a/intel_extension_for_transformers/llm/runtime/graph/cmake/Common.cmake
+++ b/intel_extension_for_transformers/llm/runtime/graph/cmake/Common.cmake
@@ -36,9 +36,25 @@ function(add_executable_w_warning TARGET)
     warning_check(${TARGET})
 endfunction()
 
-function(add_library_w_warning TARGET)
-    add_library(${TARGET} STATIC ${ARGN})
+function(add_library_w_warning_ TARGET)
+    add_library(${TARGET} ${ARGN})
     set_target_properties(${TARGET} PROPERTIES C_STANDARD 11 C_STANDARD_REQUIRED ON C_EXTENSIONS OFF)
     set_target_properties(${TARGET} PROPERTIES CXX_STANDARD 11 CXX_STANDARD_REQUIRED ON CXX_EXTENSIONS OFF)
     warning_check(${TARGET})
 endfunction()
+
+function(add_library_w_warning TARGET)
+    add_library_w_warning_(${TARGET} STATIC ${ARGN})
+endfunction()
+
+function(add_shared_library_w_warning TARGET)
+    add_library_w_warning_(${TARGET} SHARED ${ARGN})
+endfunction()
+
+function(add_shareable_library_w_warning TARGET)
+    if (BUILD_SHARED_LIBS)
+        add_library_w_warning_(${TARGET} SHARED ${ARGN})
+    else()
+        add_library_w_warning_(${TARGET} STATIC ${ARGN})
+    endif()
+endfunction()
diff --git a/intel_extension_for_transformers/llm/runtime/graph/core/CMakeLists.txt b/intel_extension_for_transformers/llm/runtime/graph/core/CMakeLists.txt
index b77e8b56d10..bcf34a9ca4b 100644
--- a/intel_extension_for_transformers/llm/runtime/graph/core/CMakeLists.txt
+++ b/intel_extension_for_transformers/llm/runtime/graph/core/CMakeLists.txt
@@ -16,7 +16,7 @@ find_package(Threads REQUIRED)
 file(GLOB layers_srcs "layers/*.cpp")
 set(sources ne_layers.c ${layers_srcs})
 
-add_library_w_warning(ne_layers "${sources}")
+add_shareable_library_w_warning(ne_layers "${sources}")
 
 target_include_directories(ne_layers PUBLIC .)
 target_compile_features(ne_layers PUBLIC c_std_11) # don't bump
diff --git a/intel_extension_for_transformers/neural_chat/chatbot.py b/intel_extension_for_transformers/neural_chat/chatbot.py
index b119d9622f7..b4d9613365f 100644
--- a/intel_extension_for_transformers/neural_chat/chatbot.py
+++ b/intel_extension_for_transformers/neural_chat/chatbot.py
@@ -87,6 +87,9 @@ def build_chatbot(config: PipelineConfig=None):
     elif "mistral" in config.model_name_or_path.lower():
         from .models.mistral_model import MistralModel
         adapter = MistralModel()
+    elif "solar" in config.model_name_or_path.lower():
+        from .models.solar_model import SolarModel
+        adapter = SolarModel()
     elif "opt" in config.model_name_or_path.lower() or \
          "gpt" in config.model_name_or_path.lower() or \
          "flan-t5" in config.model_name_or_path.lower() or \
diff --git a/intel_extension_for_transformers/neural_chat/models/solar_model.py b/intel_extension_for_transformers/neural_chat/models/solar_model.py
new file mode 100644
index 00000000000..d1f29ae1028
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/models/solar_model.py
@@ -0,0 +1,69 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .base_model import BaseModel, register_model_adapter
+import logging
+from fastchat.conversation import get_conv_template, Conversation, register_conv_template, SeparatorStyle
+
+logging.basicConfig(
+    format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
+    datefmt="%m/%d/%Y %H:%M:%S",
+    level=logging.INFO,
+)
+logger = logging.getLogger(__name__)
+
+
+# Solar-10.7B Chat Template
+# Reference: https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0/blob/main/tokenizer_config.json
+register_conv_template(
+    Conversation(
+        name="solar",
+        system_message="",
+        roles=("### User", "### Assistant"),
+        sep_style=SeparatorStyle.ADD_COLON_SPACE_SINGLE,
+        sep="\n\n",
+        stop_str="</s>",
+    )
+)
+
+class SolarModel(BaseModel):
+    def match(self, model_path: str):
+        """
+        Check if the provided model_path matches the current model.
+
+        Args:
+            model_path (str): Path to a model.
+
+        Returns:
+            bool: True if the model_path matches, False otherwise.
+        """
+        return "solar-" in model_path.lower() and "instruct" in model_path.lower()
+
+    def get_default_conv_template(self, model_path: str) -> Conversation:
+        """
+        Get the default conversation template for the given model path.
+
+        Args:
+            model_path (str): Path to the model.
+
+        Returns:
+            Conversation: A default conversation template.
+        """
+        return get_conv_template("solar")
+
+register_model_adapter(SolarModel)
+

From feef5dc9e6e08f6e87eaa0f09273bfc52fc68fc9 Mon Sep 17 00:00:00 2001
From: lvliang-intel <liang1.lv@intel.com>
Date: Fri, 22 Dec 2023 14:43:33 +0800
Subject: [PATCH 2/2] add optimization support

Signed-off-by: lvliang-intel <liang1.lv@intel.com>
---
 .../llm/quantization/optimization.py                             | 1 +
 1 file changed, 1 insertion(+)

diff --git a/intel_extension_for_transformers/llm/quantization/optimization.py b/intel_extension_for_transformers/llm/quantization/optimization.py
index 6710f8d85fb..30062aa7e58 100644
--- a/intel_extension_for_transformers/llm/quantization/optimization.py
+++ b/intel_extension_for_transformers/llm/quantization/optimization.py
@@ -56,6 +56,7 @@ def optimize(self, model, use_llm_runtime=False):
             or re.search("neural-chat-7b-v2", model_name, re.IGNORECASE)
             or re.search("neural-chat-7b-v3", model_name, re.IGNORECASE)
             or re.search("starcoder", model_name, re.IGNORECASE)
+            or re.search("solar", model_name, re.IGNORECASE)
         ):
             from intel_extension_for_transformers.transformers import AutoModelForCausalLM
             optimized_model = AutoModelForCausalLM.from_pretrained(