In [None]:
from langchain.llms.base import LLM
from langchain.llms.utils import enforce_stop_tokens
from transformers import AutoTokenizer, AutoModel
from typing import List, Optional, Any

In [None]:
from langchain_community.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS  # 向量数据库

In [None]:
# coding:utf-8
# 导入必备的工具包
from langchain.prompts import PromptTemplate
#from get_vector import *
#from model import ChatGLM2
# 加载FAISS向量库
EMBEDDING_MODEL = '/mnt/workspace/logistics/m3e-base'
embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
db = FAISS.load_local('/mnt/workspace/logistics/faiss/camp', embeddings,allow_dangerous_deserialization=True)

In [None]:
# 自定义GLM类
class ChatGLM2(LLM):
    max_token: int = 4096
    temperature: float = 0.8
    top_p = 0.9
    tokenizer: object = None
    model: object = None
    history = []

    def __init__(self):
        super().__init__()

    @property
    def _llm_type(self) -> str:
        return "custom_chatglm2"

    # 定义load_model的方法
    def load_model(self, model_path=None):
        # 加载分词器
        self.tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
        # 加载模型
        self.model = AutoModel.from_pretrained(model_path, trust_remote_code=True).float()
        #gpu
        #self.model = AutoModel.from_pretrained(model_path, trust_remote_code=True).cuda()

    # 定义_call方法：进行模型的推理
    def _call(self,prompt: str, stop: Optional[List[str]] = None) -> str:
        response, _ = self.model.chat(self.tokenizer,
                                        prompt,
                                        history=self.history,
                                        temperature=self.temperature,
                                        top_p=self.top_p)

        if stop is not None:
            response = enforce_stop_tokens(response, stop)

        self.history = self.history + [[None, response]]
        return response

In [None]:
def get_related_content(related_docs):
    related_content = []
    for doc in related_docs:
        related_content.append(doc.page_content.replace('\n\n', '\n'))
    return '\n'.join(related_content)

In [None]:
def define_prompt(question):
    #question = '我买的商品来自于哪个仓库，从哪出发的，预计什么到达'
    docs = db.similarity_search(question, k=1)
    # print(f'docs-->{docs}')
    related_docs = get_related_content(docs)

    # 构建模板
    PROMPT_TEMPLATE = """
           基于以下已知信息，简洁和专业的来回答用户的问题。不允许在答案中添加编造成分。
           已知内容:
           {context}
           问题:
           {question}"""
    prompt = PromptTemplate(input_variables=["context", "question"],
                            template=PROMPT_TEMPLATE)

    my_prompt = prompt.format(context=related_docs,
                                question=question)
    return my_prompt

In [None]:
def qa(question):
    llm = ChatGLM2()
    llm.load_model('/mnt/workspace/logistics/chatglm2-6b')
    my_prompt = define_prompt(question)
    result = llm(my_prompt)
    return result

In [None]:
if __name__ == '__main__':
    result = qa()
    print(f'result-->{result}')

In [None]:
result = qa('我买的商品运输方式是什么')
print(f'result-->{result}')

In [None]:
result = qa('我买的商品物流编号是什么')
print(f'result-->{result}')

In [None]:
result = qa('我买的产品的物流公司是那个')
print(f'result-->{result}')