# 准备并测试NeuralChat环境

注：本测试均在本地Windows环境下运行

参考资料：
https://github.com/intel/intel-extension-for-transformers/blob/main/intel_extension_for_transformers/neural_chat/README.md

## 调用本地的chatglm3-6b 模型，并进行量化处理

In [1]:
from intel_extension_for_transformers.neural_chat import PipelineConfig
from intel_extension_for_transformers.neural_chat import build_chatbot
from intel_extension_for_transformers.neural_chat import plugins
from intel_extension_for_transformers.transformers import RtnConfig
config_local = PipelineConfig(model_name_or_path='./chatglm3-6b')
chatbot = build_chatbot(config_local)



Loading model ./chatglm3-6b


Loading checkpoint shards:   0%|          | 0/7 [00:00<?, ?it/s]

2024-05-14 12:29:16,110 - root - INFO - Model loaded.


In [34]:
import time 
start_time = time.time()
question = """你好，我想咨询一些医疗方面的信息，你能帮助我么？ """
response = chatbot.predict(question)
end_time = time.time()
elapsed_time = end_time - start_time
print(f"服务调用花费了 {elapsed_time} 秒")

服务调用花费了 6.573718547821045 秒


In [35]:
start_time = time.time()
question = """我被开水烫伤了左手，请问应该如何处理？ """
response = chatbot.predict(question)
end_time = time.time()
elapsed_time = end_time - start_time
print(f"服务调用花费了 {elapsed_time} 秒")

服务调用花费了 21.66703701019287 秒


In [33]:
#response = chatbot.predict("你是谁？")

#回答是瞎扯的
response = chatbot.predict( """Please look for the ID and blood type of the Patient named "Christina Williams", I think the field name of blood type will be like patient blood type. """)
print(response)


您好，我已经找到了名为"Christina Williams"的患者的ID和血型信息。她的ID是123456，血型是O型。


## 继承Langchain的LLM类，为后续调用作准备

In [4]:
from typing import Any, List, Optional, Union
import logging
import json
from langchain_core.callbacks import CallbackManagerForLLMRun
from langchain_community.llms.utils import enforce_stop_tokens
from langchain_core.language_models.llms import LLM
from langchain_core.pydantic_v1 import Field
from langchain_core.messages import (
    AIMessage,
    BaseMessage,
    FunctionMessage,
    HumanMessage,
    SystemMessage,
)
from intel_extension_for_transformers.neural_chat.models.chatglm_model import ChatGlmModel

logger = logging.getLogger(__name__)

def _convert_message_to_dict(message: BaseMessage) -> dict:
    if isinstance(message, HumanMessage):
        message_dict = {"role": "user", "content": message.content}
    elif isinstance(message, AIMessage):
        message_dict = {"role": "assistant", "content": message.content}
    elif isinstance(message, SystemMessage):
        message_dict = {"role": "system", "content": message.content}
    elif isinstance(message, FunctionMessage):
        message_dict = {"role": "function", "content": message.content}
    else:
        raise ValueError(f"Got unknown type {message}")
    return message_dict

class ChatGLM3(LLM):
    model_name: str = Field(default="chatglm3-6b", alias="model")
    model_kwargs: Optional[dict] = None
    """Keyword arguments to pass to the model."""
    temperature: float = 0.1
    """LLM model temperature from 0 to 10."""
    top_p: float = 0.7
    """Top P for nucleus sampling from 0 to 1"""
    prefix_messages: List[BaseMessage] = Field(default_factory=list)
    """Series of messages for Chat input."""
    local_llm: ChatGlmModel = None

    @property
    def _llm_type(self) -> str:
        return "Local_Intel_chatglm3-6b"
    
    @property
    def _invocation_params(self) -> dict:
        """Get the parameters used to invoke the model."""
        params = {
            "model": self.model_name,
            "temperature": self.temperature,
            "top_p": self.top_p,
        }
        return {**params, **(self.model_kwargs or {})}
    
    def _get_payload(self, prompt: str) -> dict:
        params = self._invocation_params
        messages = self.prefix_messages + [HumanMessage(content=prompt)]
        params.update(
            {
                "messages": [_convert_message_to_dict(m) for m in messages],
            }
        )
        return params
        
    def setClient(self, client: ChatGlmModel) -> ChatGlmModel:
        if client:
            self.local_llm = client
        return self.local_llm 
    
    def _call(
        self,
        prompt: str,
        stop: Optional[List[str]] = None,
        run_manager: Optional[CallbackManagerForLLMRun] = None,
        **kwargs: Any,
    ) -> str:
        """Call out to a ChatGLM3 LLM inference endpoint.

        Args:
            prompt: The prompt to pass into the model.
            stop: Optional list of stop words to use when generating.

        Returns:
            The string generated by the model.

        Example:
            .. code-block:: python

                response = chatglm_llm("Who are you?")
        """
        import httpx

        payload = self._get_payload(prompt)
        logger.debug(f"ChatGLM3 payload: {payload}")

        try:
            # response = self.client.post(
            #     self.endpoint_url, headers=HEADERS, json=payload
            # )
            response = self.local_llm.predict(query=prompt)
        except httpx.NetworkError as e:
            raise ValueError(f"Error raised by inference endpoint: {e}")

        logger.debug(f"ChatGLM3 response: {response}")
        return response
        # if response.status_code != 200:
        #     raise ValueError(f"Failed with response: {response}")

        try:
            parsed_response = response.json()

            if isinstance(parsed_response, dict):
                content_keys = "choices"
                if content_keys in parsed_response:
                    choices = parsed_response[content_keys]
                    if len(choices):
                        text = choices[0]["message"]["content"]
                else:
                    raise ValueError(f"No content in response : {parsed_response}")
            else:
                raise ValueError(f"Unexpected response type: {parsed_response}")

        except json.JSONDecodeError as e:
            raise ValueError(
                f"Error raised during decoding response from inference endpoint: {e}."
                f"\nResponse: {response.text}"
            )

        if stop is not None:
            text = enforce_stop_tokens(text, stop)

        return text

In [27]:
customLLM = ChatGLM3()
customLLM.setClient(client=chatbot)

start_time = time.time()
question = """你好，我想咨询一些医疗方面的信息，你能帮助我么？ """
customLLM.invoke(question)
end_time = time.time()
elapsed_time = end_time - start_time
print(f"服务调用花费了 {elapsed_time} 秒")

服务调用花费了 6.558316469192505 秒


In [32]:
start_time = time.time()
question = """我被开水烫伤了左手，请问应该如何处理？ """
customLLM.invoke(question)
end_time = time.time()
elapsed_time = end_time - start_time
print(f"服务调用花费了 {elapsed_time} 秒")

服务调用花费了 27.98581600189209 秒


In [31]:
start_time = time.time()
question = """Please look for the ID and blood type of the Patient named "Christina Williams", I think the field name of blood type will be like patient blood type. """
customLLM.invoke(question)
end_time = time.time()
elapsed_time = end_time - start_time
print(f"服务调用花费了 {elapsed_time} 秒")

服务调用花费了 16.352635383605957 秒


## 使用相同的Neo4j组件，完成相同操作

1、准备SSH隧道，允许当前环境访问本地Neo4j数据库

2、定义工具包，包含Cypher查询

In [9]:
!pip install neo4j

Looking in indexes: https://mirrors.aliyun.com/pypi/simple
[33mDEPRECATION: pytorch-lightning 1.7.7 has a non-standard dependency specifier torch>=1.9.*. pip 24.1 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063[0m[33m
[0m

In [18]:
# 定义一个工具包，包含执行Cypher查询的方法
from langchain_community.graphs import Neo4jGraph
import json

graph= Neo4jGraph(
    url="bolt://47.100.39.50:7687",
    username="neo4j",
    password="Fkc@1234",
    database="neo4j")


class MyCustomToolkit:
    def __init__(self, graph: Neo4jGraph):
        self._driver = graph

    def execute_cypher_query(self, cypher_query:str = None, **params):
        # 执行Cypher查询并返回结果
        try:
            return json.dumps(self._driver.query(cypher_query),indent=4)
        except Exception as e:
            print(f"An error occurred: {e}")
        

toolkit = MyCustomToolkit(graph)

# 定义Cypher查询
cypher_query = "MATCH (n:Patient) RETURN n LIMIT 25"

# 调用函数并传入toolkit的Cypher查询
print(toolkit.execute_cypher_query(cypher_query))

[
    {
        "n": {
            "patient_dob": "1994/10/6",
            "patient_sex": "Female",
            "blood_type": "O+",
            "name": "Tiffany Ramirez",
            "T\r": "1",
            "id": "0"
        }
    },
    {
        "n": {
            "patient_dob": "1973/3/31",
            "patient_sex": "Male",
            "blood_type": "A-",
            "name": "Ruben Burns",
            "T\r": "1",
            "id": "1"
        }
    },
    {
        "n": {
            "patient_dob": "1932/5/10",
            "patient_sex": "Male",
            "blood_type": "O-",
            "name": "Chad Byrd",
            "T\r": "1",
            "id": "2"
        }
    },
    {
        "n": {
            "patient_dob": "1944/10/4",
            "patient_sex": "Male",
            "blood_type": "AB+",
            "name": "Antonio Frederick",
            "T\r": "1",
            "id": "3"
        }
    },
    {
        "n": {
            "patient_dob": "1989/1/26",
            "patient_s

2、定义与Neo4j交互的Agent与Chain

In [20]:
# ...
from langchain import PromptTemplate
cypher_generation_template = """
Task:
Generate Cypher query for a Neo4j graph database.

Instructions:
Use only the provided relationship types and properties in the schema.
Do not use any other relationship types or properties that are not provided.

Schema:
{schema}

Note:
Do not include any explanations or apologies in your responses.
Do not respond to any questions that might ask anything other than
for you to construct a Cypher statement. Do not include any text except
the generated Cypher statement. Make sure the direction of the relationship is
correct in your queries. Make sure you alias both entities and relationships
properly. Do not run any queries that would add to or delete from
the database. Make sure to alias all statements that follow as with
statement (e.g. WITH v as visit, c.billing_amount as billing_amount)
If you need to divide numbers, make sure to
filter the denominator to be non zero.

Examples:
# Who is the oldest patient and how old are they?
MATCH (p:Patient)
RETURN p.name AS oldest_patient,
       duration.between(date(p.dob), date()).years AS age
ORDER BY age DESC
LIMIT 1

# Which physician has billed the least to Cigna
MATCH (p:Payer)<-[c:COVERED_BY]-(v:Visit)-[t:TREATS]-(phy:Physician)
WHERE p.name = 'Cigna'
RETURN phy.name AS physician_name, SUM(c.billing_amount) AS total_billed
ORDER BY total_billed
LIMIT 1

# Which state had the largest percent increase in Cigna visits
# from 2022 to 2023?
MATCH (h:Hospital)<-[:AT]-(v:Visit)-[:COVERED_BY]->(p:Payer)
WHERE p.name = 'Cigna' AND v.admission_date >= '2022-01-01' AND
v.admission_date < '2024-01-01'
WITH h.state_name AS state, COUNT(v) AS visit_count,
     SUM(CASE WHEN v.admission_date >= '2022-01-01' AND
     v.admission_date < '2023-01-01' THEN 1 ELSE 0 END) AS count_2022,
     SUM(CASE WHEN v.admission_date >= '2023-01-01' AND
     v.admission_date < '2024-01-01' THEN 1 ELSE 0 END) AS count_2023
WITH state, visit_count, count_2022, count_2023,
     (toFloat(count_2023) - toFloat(count_2022)) / toFloat(count_2022) * 100
     AS percent_increase
RETURN state, percent_increase
ORDER BY percent_increase DESC
LIMIT 1

# How many non-emergency patients in North Carolina have written reviews?
MATCH (r:Review)<-[:WRITES]-(v:Visit)-[:AT]->(h:Hospital)
WHERE h.state_name = 'NC' and v.admission_type <> 'Emergency'
RETURN count(*)

String category values:
Test results are one of: 'Inconclusive', 'Normal', 'Abnormal'
Visit statuses are one of: 'OPEN', 'DISCHARGED'
Admission Types are one of: 'Elective', 'Emergency', 'Urgent'
Payer names are one of: 'Cigna', 'Blue Cross', 'UnitedHealthcare', 'Medicare',
'Aetna'

A visit is considered open if its status is 'OPEN' and the discharge date is
missing.
Use abbreviations when
filtering on hospital states (e.g. "Texas" is "TX",
"Colorado" is "CO", "North Carolina" is "NC",
"Florida" is "FL", "Georgia" is "GA", etc.)

Make sure to use IS NULL or IS NOT NULL when analyzing missing properties.
Never return embedding properties in your queries. You must never include the
statement "GROUP BY" in your query. Make sure to alias all statements that
follow as with statement (e.g. WITH v as visit, c.billing_amount as
billing_amount)
If you need to divide numbers, make sure to filter the denominator to be non
zero.

The question is:
{question}
"""

cypher_generation_prompt = PromptTemplate(
    input_variables=["schema", "question"], template=cypher_generation_template
)

In [21]:
# ...

qa_generation_template_en = """You are an assistant that takes the results
from a Neo4j Cypher query and forms a human-readable response. The
query results section contains the results of a Cypher query that was
generated based on a user's natural language question. The provided
information is authoritative, you must never doubt it or try to use
your internal knowledge to correct it. Make the answer sound like a
response to the question.

Query Results:
{context}

Question:
{question}

If the provided information is empty, say you don't know the answer.
Empty information looks like this: [{context} , the provided information is empty ]

If the information is not empty, you must provide an answer using the
results. If the question involves a time duration, assume the query
results are in units of days unless otherwise specified.

When names are provided in the query results, such as hospital names,
beware  of any names that have commas or other punctuation in them.
For instance, 'Jones, Brown and Murray' is a single hospital name,
not multiple hospitals. Make sure you return any list of names in
a way that isn't ambiguous and allows someone to tell what the full
names are.

Never say you don't have the right information if there is data in
the query results. Always use the data in the query results.

Helpful Answer:
"""

qa_generation_template = """You are an assistant that takes the results
from a Neo4j Cypher query and forms a human-readable response. The
query results section contains the results of a Cypher query that was
generated based on a user's natural language question. The provided
information is authoritative, you must never doubt it or try to use
your internal knowledge to correct it. Make the answer sound like a
response to the question.

Query Results:
{context}

Question:
{question}

If the provided information is empty, say you don't know the answer.
Empty information looks like this: []

If the information is not empty, you must provide an answer using the
results. If the question involves a time duration, assume the query
results are in units of days unless otherwise specified.

When names are provided in the query results, such as hospital names,
beware  of any names that have commas or other punctuation in them.
For instance, 'Jones, Brown and Murray' is a single hospital name,
not multiple hospitals. Make sure you return any list of names in
a way that isn't ambiguous and allows someone to tell what the full
names are.

Never say you don't have the right information if there is data in
the query results. Always use the data in the query results.

Helpful Answer:
"""

qa_generation_prompt = PromptTemplate(
    input_variables=["context", "question"], template=qa_generation_template_en
)

In [22]:
from langchain_community.graphs import Neo4jGraph
graph= Neo4jGraph(
    url="bolt://47.100.39.50:7687",
    username="neo4j",
    password="Fkc@1234",
    database="neo4j")

customLLM = ChatGLM3()
customLLM.setClient(client=chatbot)
customLLM.invoke("你是谁？")

from langchain.chains.graph_qa.cypher import GraphCypherQAChain
hospital_cypher_chain = GraphCypherQAChain.from_llm(
    cypher_llm=customLLM,
    qa_llm=customLLM,
    graph=graph,
    verbose=True,
    qa_prompt=qa_generation_prompt,
    cypher_prompt=cypher_generation_prompt,
    validate_cypher=True,
    top_k=100,    
)

In [26]:
#question = """请查找患者名字是'Patty Norman' 的入院时间和出院时间"""
import time
start_time = time.time()
question = """Please look for the ID and blood type of the Patient named "Christina Williams", I think the field name of blood type will be like patient blood type. """
response = hospital_cypher_chain.invoke(question)
end_time = time.time()

elapsed_time = end_time - start_time
print(f"服务调用花费了 {elapsed_time} 秒")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (p:Patient) WHERE p.name = 'Christina Williams' RETURN p.id AS patient_id,
  p.blood_type AS blood_type[0m
Full Context:
[32;1m[1;3m[{'patient_id': '12', 'blood_type': 'O-'}, {'patient_id': '8815', 'blood_type': 'AB+'}][0m

[1m> Finished chain.[0m
服务调用花费了 244.63360571861267 秒


## 使用Intel量化模式，验证使用效果

In [14]:
from intel_extension_for_transformers.neural_chat import PipelineConfig
from intel_extension_for_transformers.neural_chat import build_chatbot
from intel_extension_for_transformers.neural_chat import plugins
from intel_extension_for_transformers.transformers import RtnConfig
plugins.retrieval.enable=True
plugins.retrieval.args['embedding_model'] = "./bge-base-zh-v1.5"
plugins.retrieval.args["input_path"]="./sample.jsonl"
config_int8 = PipelineConfig(model_name_or_path='./chatglm3-6b',
 plugins=plugins,
 optimization_config=RtnConfig(compute_dtype="int8",
weight_dtype="int4_fullrange"))
chatbot_int8 = build_chatbot(config_int8)

2024-05-08 23:20:59,903 - sentence_transformers.SentenceTransformer - INFO - Load pretrained SentenceTransformer: ./bge-base-zh-v1.5


create retrieval plugin instance...
plugin parameters:  {'embedding_model': './bge-base-zh-v1.5', 'input_path': './sample.jsonl'}


2024-05-08 23:21:01,417 - sentence_transformers.SentenceTransformer - INFO - Use pytorch device_name: cpu
2024-05-08 23:21:01,420 - root - INFO - The parsing for the uploaded files is finished.
2024-05-08 23:21:01,420 - root - INFO - The format of parsed documents is transferred.
2024-05-08 23:21:01,435 - chromadb.telemetry.product.posthog - INFO - Anonymized telemetry enabled. See                     https://docs.trychroma.com/telemetry for more information.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2024-05-08 23:21:01,915 - root - INFO - The retriever is successfully built.
2024-05-08 23:21:02 [INFO] Applying Weight Only Quantization.


Loading model ./chatglm3-6b


Loading checkpoint shards:   0%|          | 0/7 [00:00<?, ?it/s]

2024-05-08 23:21:36 [INFO] Start auto tuning.
2024-05-08 23:21:36 [INFO] Quantize model without tuning!
2024-05-08 23:21:36 [INFO] Quantize the model with default configuration without evaluating the model.                To perform the tuning process, please either provide an eval_func or provide an                    eval_dataloader an eval_metric.
2024-05-08 23:21:36 [INFO] Adaptor has 5 recipes.
2024-05-08 23:21:36 [INFO] 0 recipes specified by user.
2024-05-08 23:21:36 [INFO] 3 recipes require future tuning.
2024-05-08 23:21:36 [INFO] *** Initialize auto tuning
2024-05-08 23:21:36 [INFO] {
2024-05-08 23:21:36 [INFO]     'PostTrainingQuantConfig': {
2024-05-08 23:21:36 [INFO]         'AccuracyCriterion': {
2024-05-08 23:21:36 [INFO]             'criterion': 'relative',
2024-05-08 23:21:36 [INFO]             'higher_is_better': True,
2024-05-08 23:21:36 [INFO]             'tolerable_loss': 0.01,
2024-05-08 23:21:36 [INFO]             'absolute': None,
2024-05-08 23:21:36 [INFO]     

In [16]:
customLLM_int8 = ChatGLM3()
customLLM_int8.setClient(client=chatbot_int8)
# customLLM_int8.invoke("你是谁？")

from langchain.chains.graph_qa.cypher import GraphCypherQAChain
hospital_cypher_chain_int8 = GraphCypherQAChain.from_llm(
    cypher_llm=customLLM_int8,
    qa_llm=customLLM_int8,
    graph=graph,
    verbose=True,
    qa_prompt=qa_generation_prompt,
    cypher_prompt=cypher_generation_prompt,
    validate_cypher=True,
    top_k=100,    
)

#question = """请查找患者名字是'Patty Norman' 的入院时间和出院时间"""
question = """Please look for the ID and blood type of the Patient named "Christina Williams", I think the field name of blood type will be like patient blood type. """
response = hospital_cypher_chain_int8.invoke(question)



[1m> Entering new GraphCypherQAChain chain...[0m


CPU Autocast only supports dtype of torch.bfloat16, torch.float16 currently.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2024-05-08 23:39:24,953 - root - INFO - Chat with QA Agent.
CPU Autocast only supports dtype of torch.bfloat16, torch.float16 currently.


Generated Cypher:
[32;1m[1;3mPlease find the ID and blood type of the Patient named "Christina Williams" in the table.[0m


ValueError: Generated Cypher Statement is not valid
{code: Neo.ClientError.Statement.SyntaxError} {message: Invalid input 'Please': expected
  "ALTER"
  "CALL"
  "CREATE"
  "DEALLOCATE"
  "DELETE"
  "DENY"
  "DETACH"
  "DROP"
  "DRYRUN"
  "ENABLE"
  "FOREACH"
  "GRANT"
  "LOAD"
  "MATCH"
  "MERGE"
  "OPTIONAL"
  "REALLOCATE"
  "REMOVE"
  "RENAME"
  "RETURN"
  "REVOKE"
  "SET"
  "SHOW"
  "START"
  "STOP"
  "TERMINATE"
  "UNWIND"
  "USE"
  "USING"
  "WITH" (line 1, column 1 (offset: 0))
"Please find the ID and blood type of the Patient named "Christina Williams" in the table."
 ^}