In [1]:
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_ollama import OllamaEmbeddings
from langchain_postgres.vectorstores import PGVector
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from pydantic import BaseModel
from langchain_core.runnables import RunnablePassthrough
from langchain_ollama import ChatOllama
from langchain_core.documents import Document
from langchain.retrievers.multi_vector import MultiVectorRetriever
from langchain.storage import InMemoryStore
import uuid

In [3]:
connection = "postgresql+psycopg://langchain:langchain@localhost:6024/langchain"
collection_name = "summaries"
embeddings_model = OllamaEmbeddings(
    base_url="http://localhost:11434", model="nomic-embed-text"
)
# Load the document
loader = TextLoader("./resources/the_little_prince.txt", encoding="utf-8")
docs = loader.load()
print("length of loaded docs: ", len(docs[0].page_content))

length of loaded docs:  91915


In [5]:
# Split the document
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = splitter.split_documents(docs)

print("length of split docs: %d" % len(chunks))
print("The length of first chunk: %d" % len(chunks[0].page_content))
print("The first chunk: ", chunks[0].page_content)

length of split docs: 113
The length of first chunk: 937
The first chunk:  The Little Prince

I Once when I was six years old I saw a magnificent picture in a book, called True Stories from Nature, about the primeval forest.

It was a picture of a boa constrictor in the act of swallowing an animal.

Here is a copy of the drawing.

Boa

In the book it said:

"Boa constrictors swallow their prey whole, without chewing it.

After that they are not able to move, and they sleep through the six months that they need for digestion."

I pondered deeply, then, over the adventures of the jungle. And after some work with a colored pencil I succeeded in making my first drawing.

My Drawing Number One.

It looked something like this:

Sombrero

I showed my masterpiece to the grown-ups, and asked them whether the drawing frightened them.

But they answered: "Frighten? Why should any one be frightened by a hat?"

My drawing was not a picture of a hat.

It was a picture of a boa constrictor digesting 

In [6]:
# The rest of your code remains the same, starting from:
prompt_text = "Summarize the following document:\n\n{doc}"

In [7]:
prompt = ChatPromptTemplate.from_template(prompt_text)
llm = ChatOllama(
    base_url="http://localhost:11434",
    model="qwen2.5:32b",
    temperature=0
)
# We will not use OpenAI models for this example
# llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo")
summarize_chain = {
    "doc": lambda x: x.page_content} | prompt | llm | StrOutputParser()

In [8]:
# batch the chain across the chunks
summaries = summarize_chain.batch(chunks, {"max_concurrency": 5})

In [9]:
summaries

['The excerpt introduces the narrative style and themes of "The Little Prince" by Antoine de Saint-Exupéry, focusing on the disconnect between the narrator\'s (who is also the author) imaginative perspective as a child and the literal-mindedness of adults. The story begins with the narrator recalling an image from his childhood book about a boa constrictor swallowing its prey whole. He then drew what he imagined—a boa digesting an elephant—but depicted it in such a way that adults only saw a hat, highlighting the theme of misunderstanding between children and grown-ups.',
 "The document describes an interaction between a child artist and some grown-ups. The child's first drawing was misunderstood as a hat when it was actually meant to depict a boa constrictor digesting an elephant. In an attempt to clarify, the child made a second drawing showing the inside of the boa constrictor but was discouraged by the adults who advised focusing on more conventional subjects like geography and his

Next, let’s define the vector store and docstore to store the raw summaries and their embeddings:

接下来，定义向量存储和文档存储来保存原始摘要及其嵌入表示：

In [10]:
# The vectorstore to use to index the child chunks
vectorstore = PGVector(
    embeddings=embeddings_model,  # Ollama embeddings nomic-embed-text
    collection_name=collection_name,  # collection_name = "summaries"
    connection=connection,  # connection = "postgresql+psycopg://langchain:langchain@localhost:6024/langchain"
    use_jsonb=True,
)

In [11]:
# The storage layer for the parent documents
store = InMemoryStore()
id_key = "doc_id"

In [12]:
# indexing the summaries in our vector store, whilst retaining the original
# documents in our document store:
retriever = MultiVectorRetriever(
    vectorstore=vectorstore, 
    docstore=store,
    id_key=id_key,
)

In [13]:
# Ensures each document chunk gets its own distinct identifier
doc_ids = [str(uuid.uuid4()) for _ in chunks]

In [14]:
print("length of doc_ids: %d" % len(doc_ids))
print("the first doc_id: ", doc_ids[0])

length of doc_ids: 113
the first doc_id:  e92fc4d6-4c02-4d35-ab3a-807b54c7fa4e


In [15]:
# Each summary is linked to the original document by the doc_id
summary_docs = [
    Document(page_content=s, metadata={id_key: doc_ids[i]})
    for i, s in enumerate(summaries)
]

In [16]:
print("length of summary_docs: %d" % len(summary_docs))
print("the first summary_doc: ", summary_docs[0].page_content)

length of summary_docs: 113
the first summary_doc:  The excerpt introduces the narrative style and themes of "The Little Prince" by Antoine de Saint-Exupéry, focusing on the disconnect between the narrator's (who is also the author) imaginative perspective as a child and the literal-mindedness of adults. The story begins with the narrator recalling an image from his childhood book about a boa constrictor swallowing its prey whole. He then drew what he imagined—a boa digesting an elephant—but depicted it in such a way that adults only saw a hat, highlighting the theme of misunderstanding between children and grown-ups.


In [17]:
# Add the document summaries to the vector store for similarity search
retriever.vectorstore.add_documents(summary_docs)

['f254f42d-2cef-490e-8b70-878e508df465',
 'c80b983b-18aa-4221-ad05-845ef7ae9c38',
 '789b1fe7-2ee5-4169-9d20-b3ff04dcc0e6',
 '202bb71a-bbde-4cf0-80ef-5c911baef861',
 '8f46fe07-11b2-42ba-8f7a-95974e2a56c6',
 '2b3bbd90-8c33-4bdd-adb5-8e9320376934',
 'f2d45d1d-a292-42f3-8ee4-d0d2d01f4aa3',
 'f61c2b3a-4aca-4f38-9b46-0da26e6f8cca',
 '1f7e1b78-3c0c-4728-ad45-9e4a55d6cefa',
 '0bce7f0a-9ca6-491f-bf89-3fd792ae4243',
 'c7b5be86-0d05-4229-a518-4743373532fc',
 '29ff027d-577f-42e5-877d-9f2e99ad375d',
 '01c7fc09-d4ca-4467-b23a-04aaf176060b',
 '10e57482-bb68-4e3b-a998-d247e9f047cf',
 'ada75857-1fa9-4d46-b091-41940f3b6dc5',
 '2f4c6ed6-3845-419e-8517-4aecf9196b61',
 '8117a87f-467a-4e8e-bd7e-5d0462bc7fa4',
 '637d7f45-b54b-4592-8323-c3abcf9e5244',
 '6025f103-636f-4ed9-883c-15aa51b56ee7',
 '028867ff-87d5-4b38-bcee-94ec9f552989',
 '326149b1-421f-490b-9605-17a3eb59c0ff',
 'fdc77dd3-b116-4dde-a54f-aed7edb83e6a',
 '41dea6ab-21eb-4e23-a5fb-a0c06178635c',
 '096c5242-4f10-47bd-a975-4a2be4fd9ee5',
 'bae15891-1946-

In [18]:

# Store the original documents in the document store, linked to their summaries 
# via doc_ids
# This allows us to first search summaries efficiently, then fetch the full 
# docs when needed
retriever.docstore.mset(list(zip(doc_ids, chunks)))

In [19]:
# vector store retrieves the summaries
sub_docs = retriever.vectorstore.similarity_search(
    "the death of the prince", k=2)

In [20]:
sub_docs

[Document(id='de02e49e-591a-4515-9b42-3644d0bc4098', metadata={'doc_id': '294638eb-a486-4dd0-95c3-7542b984b0f7'}, page_content="The document describes an encounter between the narrator and a little prince facing a deadly yellow snake. As the narrator attempts to intervene, the snake quickly disappears among the stones. The narrator then finds the little prince pale and in distress, tending to him by loosening his golden muffler, moistening his temples, and giving him water. Despite the urgency of the situation, the narrator refrains from asking more questions out of concern for the little prince's fragile state. The little prince, looking seriously at the narrator, expresses relief that the narrator has figured out what was wrong with his engine."),
 Document(id='96eab558-597c-4a50-af18-f01c95f83df4', metadata={'doc_id': 'f5f22acb-b712-4c60-8391-b8071f2ad67a'}, page_content="The document describes a reflective and somewhat melancholic scene where the narrator drinks water and feels the

In [21]:
# Whereas the retriever will return the larger source document chunks:
retrieved_docs = retriever.invoke("the death of the prince", k=2)
retrieved_docs

[Document(metadata={'source': './resources/the_little_prince.txt'}, page_content='There before me, facing the little prince, was one of those yellow snakes that take just thirty seconds to bring your life to an end.\n\nEven as I was digging into my pocked to get out my revolver I made a running step back. But, at the noise I made, the snake let himself flow easily across the sand like the dying spray of a fountain, and, in no apparent hurry, disappeared, with a light metallic sound, among the stones.\n\nI reached the wall just in time to catch my little man in my arms; his face was white as snow.\n\n"What does this mean?"\n\nI demanded. "Why are you talking with snakes?"\n\nI had loosened the golden muffler that he always wore.\n\nI had moistened his temples, and had given him some water to drink.\n\nAnd now I did not dare ask him any more questions.\n\nHe looked at me very gravely, and put his arms around my neck.\n\nI felt his heart beating like the heart of a dying bird, shot with s