In [None]:
from langchain_community.vectorstores import Chroma

from langchain_text_splitters import CharacterTextSplitter

from langchain_openai import OpenAIEmbeddings

In [None]:
# 大语言模型 llm

from langchain_community.chat_models import ChatOllama

llm = ChatOllama(model="llama3")

In [None]:
# RAG retriever

with open("xx.txt", encoding="utf-8") as f:
    text = f.read()

text_splitter = CharacterTextSplitter(
    separator="\n",
    chunk_size=1000,
    chunk_overlap=200,
    length_function=len,
    is_separator_regex=False,
)

texts = text_splitter.create_documents([text])

embeddings = OpenAIEmbeddings()

vectorstore = Chroma.from_documents(texts, embeddings)

retriever = vectorstore.as_retriever(
    search_type="similarity_score_threshold", 
    search_kwargs={"k": 3, "score_threshold": 0.5}"
)

In [None]:
from langchain.prompts import PromptTemplate

prompt = PromptTemplate.from_template("""
Answer the question based on the context below, and if the question can't be answered based on the context, say "I don't know"
Context: {context}
Question: {question}
Answer:    
""")


from langchain.schema.output_parser import StrOutputParser

from langchain.schema.runnable import RunnablePassthrough

chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [None]:
response = chain.invoke("世界上最高的山是哪座山？")
print(response)