Skip to content

Commit

Permalink
Support neural-chat-7b-v3 and neural-chat-7b-v3-1 (#698)
Browse files Browse the repository at this point in the history
* Support neural-chat-7b-v3 and neural-chat-7b-v3-1

Signed-off-by: lvliang-intel <liang1.lv@intel.com>
  • Loading branch information
lvliang-intel committed Nov 17, 2023
1 parent e1cca32 commit d9a8641
Show file tree
Hide file tree
Showing 7 changed files with 127 additions and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -362,6 +362,7 @@ def load_model(
or re.search("llama", model_name, re.IGNORECASE)
or re.search("neural-chat-7b-v1", model_name, re.IGNORECASE)
or re.search("neural-chat-7b-v2", model_name, re.IGNORECASE)
or re.search("neural-chat-7b-v3", model_name, re.IGNORECASE)
or re.search("qwen", model_name, re.IGNORECASE)
or re.search("starcoder", model_name, re.IGNORECASE)
or re.search("Mistral", model_name, re.IGNORECASE)
Expand Down Expand Up @@ -990,4 +991,12 @@ def predict(**params):
output = tokenizer.decode(generation_output.sequences[0], skip_special_tokens=True)
if "### Response:" in output:
return output.split("### Response:")[1].strip()
if "### Assistant" in output:
return output.split("### Assistant:")[1].strip()
if "\nassistant\n" in output:
return output.split("\nassistant\n")[1].strip()
if "[/INST]" in output:
return output.split("[/INST]")[1].strip()
if "答:" in output:
return output.split("答:")[1].strip()
return output
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ def get_default_conv_template(self, model_path: str) -> Conversation:
"""
if "neural-chat-7b-v2" in model_path.lower():
return get_conv_template("neural-chat-7b-v2")
elif "neural-chat-7b-v3" in model_path.lower():
return get_conv_template("neural-chat-7b-v3")
else:
return get_conv_template("neural-chat-7b-v1-1")

Expand Down
34 changes: 34 additions & 0 deletions intel_extension_for_transformers/neural_chat/prompts/prompt.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,40 @@

from fastchat.conversation import get_conv_template, register_conv_template, Conversation, SeparatorStyle

# neuralchat-v3-1 prompt template
register_conv_template(
Conversation(
name="neural-chat-7b-v3-1",
system_message="""### System:
- You are a helpful assistant chatbot trained by Intel.
- You answer questions.
- You are excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user.
- You are more than just an information source, you are also able to write poetry, \
short stories, and make jokes.</s>\n""",
roles=("### User:", "### Assistant:"),
sep_style=SeparatorStyle.NO_COLON_TWO,
sep="\n",
sep2="</s>",
)
)

# neuralchat-v3 prompt template
register_conv_template(
Conversation(
name="neural-chat-7b-v3",
system_message="""### System:
- You are a helpful assistant chatbot trained by Intel.
- You answer questions.
- You are excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user.
- You are more than just an information source, you are also able to write poetry, \
short stories, and make jokes.</s>\n""",
roles=("### User:", "### Assistant:"),
sep_style=SeparatorStyle.NO_COLON_TWO,
sep="\n",
sep2="</s>",
)
)

# neuralchat-v2 prompt template
register_conv_template(
Conversation(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from intel_extension_for_transformers.neural_chat.pipeline.plugins.caching.cache import ChatCache
from intel_extension_for_transformers.neural_chat import build_chatbot, PipelineConfig
import unittest
import os
import os, shutil

class TestChatCache(unittest.TestCase):
def setUp(self):
Expand All @@ -36,18 +36,17 @@ def tearDown(self) -> None:
return super().tearDown()

def test_chat_cache(self):
cache_plugin = ChatCache(embedding_model_dir="/tf_dataset2/models/nlp_toolkit/instructor-large")
cache_plugin = ChatCache(embedding_model_dir="hkunlp/instructor-large")
cache_plugin.init_similar_cache_from_config()

prompt = "Tell me about Intel Xeon Scable Processors."
prompt = "Tell me about Intel Xeon Scalable Processors."
config = PipelineConfig(model_name_or_path="facebook/opt-125m")
chatbot = build_chatbot(config)
response = chatbot.predict(prompt)
cache_plugin.put(prompt, response)

answer = cache_plugin.get(prompt)
self.assertIn('Tell me about Intel Xeon Scable Processors.', str(answer))
self.assertIn('Intel Xeon Scalable', str(answer['choices'][0]['text']))


if __name__ == "__main__":
unittest.main()
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from intel_extension_for_transformers.neural_chat.models.llama_model import LlamaModel
from intel_extension_for_transformers.neural_chat.models.mpt_model import MptModel
from intel_extension_for_transformers.neural_chat.models.neuralchat_model import NeuralChatModel
from intel_extension_for_transformers.neural_chat import build_chatbot, PipelineConfig
import unittest

class TestChatGlmModel(unittest.TestCase):
Expand All @@ -29,12 +30,17 @@ def tearDown(self) -> None:
return super().tearDown()

def test_match(self):
result = ChatGlmModel().match(model_path='/tf_dataset2/models/nlp_toolkit/chatglm2-6b')
result = ChatGlmModel().match(model_path='THUDM/chatglm2-6b')
self.assertTrue(result)

def test_get_default_conv_template(self):
result = ChatGlmModel().get_default_conv_template(model_path='/tf_dataset2/models/nlp_toolkit/chatglm-6b')
result = ChatGlmModel().get_default_conv_template(model_path='THUDM/chatglm2-6b')
self.assertIn('问', str(result))
config = PipelineConfig(model_name_or_path="THUDM/chatglm2-6b")
chatbot = build_chatbot(config=config)
result = chatbot.predict("中国最大的城市是哪个?")
print(result)
self.assertIn('上海', str(result))

class TestLlamaModel(unittest.TestCase):
def setUp(self):
Expand All @@ -44,12 +50,16 @@ def tearDown(self) -> None:
return super().tearDown()

def test_match(self):
result = LlamaModel().match(model_path='/tf_dataset2/models/nlp_toolkit/llama-2-7b-chat')
result = LlamaModel().match(model_path='meta-llama/Llama-2-7b-chat-hf')
self.assertTrue(result)

def test_get_default_conv_template(self):
result = LlamaModel().get_default_conv_template(model_path='/tf_dataset2/models/nlp_toolkit/llama-2-7b-chat')
result = LlamaModel().get_default_conv_template(model_path='meta-llama/Llama-2-7b-chat-hf')
self.assertIn("[INST] <<SYS>>", str(result))
chatbot = build_chatbot()
result = chatbot.predict("Tell me about Intel Xeon Scalable Processors.")
print(result)
self.assertIn('Intel Xeon Scalable Processors', str(result))

class TestMptModel(unittest.TestCase):
def setUp(self):
Expand All @@ -59,12 +69,17 @@ def tearDown(self) -> None:
return super().tearDown()

def test_match(self):
result = MptModel().match(model_path='/tf_dataset2/models/nlp_toolkit/mpt-7b')
result = MptModel().match(model_path='mosaicml/mpt-7b-chat')
self.assertTrue(result)

def test_get_default_conv_template(self):
result = MptModel().get_default_conv_template(model_path='/tf_dataset2/models/nlp_toolkit/mpt-7b')
result = MptModel().get_default_conv_template(model_path='mosaicml/mpt-7b-chat')
self.assertIn("<|im_start|>system", str(result))
config = PipelineConfig(model_name_or_path="mosaicml/mpt-7b-chat")
chatbot = build_chatbot(config=config)
result = chatbot.predict("Tell me about Intel Xeon Scalable Processors.")
print(result)
self.assertIn('Intel Xeon Scalable processors', str(result))

class TestNeuralChatModel(unittest.TestCase):
def setUp(self):
Expand All @@ -81,10 +96,33 @@ def test_get_default_conv_template_v1(self):
result = NeuralChatModel().get_default_conv_template(
model_path='Intel/neural-chat-7b-v1-1')
self.assertIn("<|im_start|>system", str(result))
config = PipelineConfig(model_name_or_path="Intel/neural-chat-7b-v1-1")
chatbot = build_chatbot(config=config)
result = chatbot.predict("Tell me about Intel Xeon Scalable Processors.")
print(result)
self.assertIn('Intel® Xeon® Scalable processors', str(result))

def test_get_default_conv_template_v2(self):
result = NeuralChatModel().get_default_conv_template(model_path='Intel/neural-chat-7b-v2')
self.assertIn("### System:", str(result))

def test_get_default_conv_template_v3(self):
result = NeuralChatModel().get_default_conv_template(model_path='Intel/neural-chat-7b-v3')
self.assertIn("### System:", str(result))
config = PipelineConfig(model_name_or_path="Intel/neural-chat-7b-v3")
chatbot = build_chatbot(config=config)
result = chatbot.predict("Tell me about Intel Xeon Scalable Processors.")
print(result)
self.assertIn('The Intel Xeon Scalable Processors', str(result))

def test_get_default_conv_template_v3_1(self):
result = NeuralChatModel().get_default_conv_template(model_path='Intel/neural-chat-7b-v3-1')
self.assertIn("### System:", str(result))
config = PipelineConfig(model_name_or_path="Intel/neural-chat-7b-v3-1")
chatbot = build_chatbot(config=config)
result = chatbot.predict("Tell me about Intel Xeon Scalable Processors.")
print(result)
self.assertIn('The Intel Xeon Scalable Processors', str(result))

if __name__ == "__main__":
unittest.main()
Original file line number Diff line number Diff line change
Expand Up @@ -322,7 +322,8 @@ def http_bot(state, model_selector, temperature, max_new_tokens, topk, request:

if len(state.messages) == state.offset + 2:
# model conversation name: "mpt-7b-chat", "chatglm", "chatglm2", "llama-2",
# "neural-chat-7b-v2", "neural-chat-7b-v1-1"
# "neural-chat-7b-v3-1", "neural-chat-7b-v3",
# "neural-chat-7b-v2", "neural-chat-7b-v1-1"
# First round of Conversation
if "Llama-2-7b-chat-hf" in model_name:
model_name = "llama-2"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -946,6 +946,38 @@ def get_conv_template(name: str) -> Conversation:
)
)

# neuralchat-v2 template
register_conv_template(
Conversation(
name="neural-chat-7b-v3",
system_message="""### System:
- You are a helpful assistant chatbot trained by Intel.
- You answer questions.
- You are excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user.
- You are more than just an information source, you are also able to write poetry, short stories, and make jokes.</s>\n""",
roles=("### User:", "### Assistant:"),
sep_style=SeparatorStyle.NO_COLON_TWO,
sep="\n",
sep2="</s>",
)
)

# neuralchat-v2 template
register_conv_template(
Conversation(
name="neural-chat-7b-v3-1",
system_message="""### System:
- You are a helpful assistant chatbot trained by Intel.
- You answer questions.
- You are excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user.
- You are more than just an information source, you are also able to write poetry, short stories, and make jokes.</s>\n""",
roles=("### User:", "### Assistant:"),
sep_style=SeparatorStyle.NO_COLON_TWO,
sep="\n",
sep2="</s>",
)
)

# neuralchat-v1.1 prompt template
register_conv_template(
Conversation(
Expand Down

0 comments on commit d9a8641

Please sign in to comment.