# 本地知识库的加载和查询
1. 用文本加载器TextLoader加载本地text文件
2. 使用similarity_search进行相关性检索
3. 使用similarity_search_with_score进行带打分的相关性检索，这个打分可以进行阈值筛选

In [None]:
from typing import List
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import ModelScopeEmbeddings
from langchain.vectorstores import Chroma
from langchain.docstore.document import Document

raw_doc1 = TextLoader('resources/doc1.txt', encoding='utf-8').load()
text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=0)
sp_doc1:List[Document] = text_splitter.split_documents(raw_doc1)
print(sp_doc1)

model_id = "damo/nlp_corom_sentence-embedding_chinese-base"
embeddings = ModelScopeEmbeddings(model_id=model_id)
vectorstore_doc1=Chroma.from_documents(sp_doc1, embedding=embeddings)
print("\nsimilarity_search的结果")
ret=vectorstore_doc1.similarity_search("介绍一下小李飞刀", k=1)
print(ret)

print("\nsimilarity_search_with_score的结果")
ret=vectorstore_doc1.similarity_search_with_score("介绍一下小李飞刀", k=1)
print(f"document={ret[0][0].page_content}， 得分：{ret[0][1]}")

# similarity_search_with_score 相关性查找的例子
演示了Chroma.from_documents和Chroma.from_texts的用法
其中的 k参数表示返回的记录数，按照相关性倒序排列

In [None]:
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import ModelScopeEmbeddings
from langchain.vectorstores import Chroma
from langchain.docstore.document import Document
from typing import List

model_id = "damo/nlp_corom_sentence-embedding_chinese-base"
embeddings = ModelScopeEmbeddings(model_id=model_id)
documents_list1=[
    Document(page_content="李寻欢是个侠客，又帅又高又多金")
]
vectorstore_doc1=Chroma.from_documents(documents_list1, embedding=embeddings)
print(vectorstore_doc1)

ret=vectorstore_doc1.similarity_search("李寻欢是谁", k=1)
print(ret)
embeddings = ModelScopeEmbeddings(model_id=model_id)
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts:List[str] = text_splitter.split_text("张三丰是全真派教主，是个白胡子老头")
vectorstore_doc1_1=Chroma.from_texts(texts, embedding=embeddings)
ret=vectorstore_doc1_1.similarity_search("张三丰是谁?", k=1)
print(ret)