# 加载文本到向量数据库（chroma db）

In [10]:
import ast
import json
from langchain.llms.base import LLM
from transformers import AutoTokenizer, AutoModel, AutoConfig
from typing import List, Optional
from pathlib import Path
from typing import Annotated, Union

import typer
from peft import AutoPeftModelForCausalLM, PeftModelForCausalLM
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    PreTrainedModel,
    PreTrainedTokenizer,
    PreTrainedTokenizerFast,
)

# ModelType = Union[PreTrainedModel, PeftModelForCausalLM]
# TokenizerType = Union[PreTrainedTokenizer, PreTrainedTokenizerFast]



# def _resolve_path(path: Union[str, Path]) -> Path:
#     return Path(path).expanduser().resolve()


# def load_model_and_tokenizer(model_dir: Union[str, Path]) -> tuple[ModelType, TokenizerType]:
#     model_dir = _resolve_path(model_dir)
#     if (model_dir / 'adapter_config.json').exists():
#         model = AutoPeftModelForCausalLM.from_pretrained(
#             model_dir, trust_remote_code=True, device_map='auto'
#         )
#         tokenizer_dir = model.peft_config['default'].base_model_name_or_path
#     else:
#         model = AutoModelForCausalLM.from_pretrained(
#             model_dir, trust_remote_code=True, device_map='auto'
#         )
#         tokenizer_dir = model_dir
#     tokenizer = AutoTokenizer.from_pretrained(
#         tokenizer_dir, trust_remote_code=True
#     )
#     return model, tokenizer


from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.document_loaders import TextLoader
from langchain.chains import RetrievalQA
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from langchain.prompts import PromptTemplate
from langchain.llms import HuggingFacePipeline
from langchain.chains import LLMChain
embeddings = HuggingFaceEmbeddings(model_name="embeddings/text2vec-large-chinese",
                                    model_kwargs={'device': "cuda"})
texts = []
for data in ["database.txt"]:
    documents = TextLoader(data, encoding='utf8').load()
    text_splitter = CharacterTextSplitter(chunk_size=10, chunk_overlap=0)
    texts += text_splitter.split_documents(documents)
db = Chroma.from_documents(texts, embeddings)
retriever = db.as_retriever()

No sentence-transformers model found with name embeddings/text2vec-large-chinese. Creating a new one with MEAN pooling.
Created a chunk of size 265, which is longer than the specified 10
Created a chunk of size 34, which is longer than the specified 10
Created a chunk of size 244, which is longer than the specified 10
Created a chunk of size 149, which is longer than the specified 10
Created a chunk of size 759, which is longer than the specified 10
Created a chunk of size 580, which is longer than the specified 10
Created a chunk of size 351, which is longer than the specified 10
Created a chunk of size 743, which is longer than the specified 10


# 使用langchain的接口进行查询并调用大模型

In [None]:

from openai import OpenAI

base_url = "http://127.0.0.1:8000/v1"
# llm = OpenAI(api_key="EMPTY", base_url=base_url)
# llm = ChatGLM3.load_model('chatglm3-6b')
# template = """{question}"""
# prompt = PromptTemplate.from_template(template)
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import HuggingFacePipeline
# model, tokenizer = load_model_and_tokenizer('outputting/checkpoint-10000/')
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.schema.messages import AIMessage
from langchain_community.llms.chatglm3 import ChatGLM3
endpoint_url = "http://127.0.0.1:8000/v1/chat/completions"

messages = [
    AIMessage(content='''提示：
                                   你是一个论文检索机器人，你要帮助检索论文中出现的关于各方面算法技术的记载'''),
    AIMessage(content="欢迎问我任何问题。"),
]

llm = ChatGLM3(
    endpoint_url=endpoint_url,
    max_tokens=1000,
    prefix_messages=messages,
    top_p=0.9,
)

qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever)
# qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever)
while True:
    query = input("请输入您的问题：")
    data = qa.run('''提示：
                        你是一个论文检索机器人，你要帮助检索论文中出现的关于各方面算法技术的记载，
                        尝试理解论文信息，并提供准确的查询结果\n
                        问题:
                        '''+query+'''\n
                        回答：
                        ''')
    print(data)

# 也可手动编写向量搜索和大模型查询的代码，优势是可以自定义向量查询和调用大模型的不同方式，更加灵活

In [18]:
doc = db.similarity_search('位置编码')
print(doc)
llm(prompt='''提示：
                        你是一个论文检索机器人，你要帮助检索论文中出现的关于各方面算法技术的记载，
                        通过检索到的文本内容，并提供准确的查询结果\n
                        问题:
                        '''+query+'''\n
                        搜索到的文本为：
                        '''+doc+'''\n
                        回答：
                        ''')


[Document(page_content='其实也不复杂，pos为当前参数在seq_len的位置，i为当前参数在d_model的位置，将这二者的位置信息通过函数处理后加到前面的word embeddings里，赋予了每个位置独一无二的信息，而且赋予了相邻数据的相关性，从而达到了卷积神经网络和循环神经网络的考虑上下文的效果。\n而具体实现过程中，我们既可以按这种方式来，也可以new一个Embedding层，通过输入连续自然数矩阵，获取位置编码矩阵。这个Embedding层和前面做文本编码的不同，它的shape为(seq_len,d_model)，输入的数据为连续自然数矩阵，shape为(,seq_len)，前向传播的输出即(batch,seq_len,d_model)。这样就可以自动学习上下文的位置联系，这样很合理，毕竟本身它也是一个(seq_len,d_model)的矩阵，直接咬死一个固定的函数反而显得不合理了。（不过这样训练的成本肯定也会高，收敛速度不好说）我们可以通过这个函数的固定矩阵反推这个Embedding的值，作为矩阵的初始化权重。\n具体实现也很简单：\nself.src_position_embedding = nn.Embedding(seq_len, d_model)\n前向传播时：\npositions = torch.arange(0, seq_len).expand(N, seq_len)\nsrc_position_outs = self.src_position_embedding(positions)\n而后就可以将其和word embeddings的输出结果相加\nsrc_word_outs+src_position_outs\n具体实现可以考虑添加Dropout等结构，帮助整体收敛的优化。', metadata={'source': 'database.txt'}), Document(page_content='其实也不复杂，pos为当前参数在seq_len的位置，i为当前参数在d_model的位置，将这二者的位置信息通过函数处理后加到前面的word embeddings里，赋予了每个位置独一无二的信息，而且赋予了相邻数据的相关性，从而达到了卷积神经网络和循环神经网络的考虑上下文的效果。\n而具体实现过程中，我们既可以按这种方

NameError: name 'llm' is not defined