# 使用Document

## 保存到duckdb

In [None]:
import duckdb
from langchain.io import DuckDBLoader

# 创建一个 DuckDBLoader 对象
loader = DuckDBLoader(database=":memory:", table_name="my_table")

# 将 Document 列表保存到 DuckDB
loader.load(docs)

# 创建一个连接到 DuckDB 的游标
con = duckdb.connect(database=":memory:")
cur = con.cursor()

# 执行查询以检索所有文档
cur.execute("SELECT * FROM my_table")

# 提取查询结果
results = cur.fetchall()

# 打印查询结果
for result in results:
    print(result)

# 关闭连接
cur.close()
con.close()

## 结合向量检索使用

In [None]:
from langchain.schema import Document
from langchain.vectors import SentenceTransformers

# 创建一个新的 Document 对象
document = Document(
    page_content="This is the text content of the document.",
    metadata={
        "source": "Wikipedia",
        "author": "Jane Doe",
        "date": "2023-08-08",
    },
)

# 将文档转换为向量
vector_encoder = SentenceTransformers("sentence-transformers/paraphrase-multilingual-mpnet-base-v2")
document_vector = vector_encoder.encode(document.page_content)

# 将向量和元数据存储到向量数据库中
vector_database = VectorDatabase()
vector_database.add(document_vector, document, metadata=document.metadata)

# 使用 Document 对象进行查询
query_document = Document(page_content="This is a query.")

# 将查询转换为向量
query_vector = vector_encoder.encode(query_document.page_content)

# 在向量数据库中搜索向量
similar_vectors = vector_database.search(query_vector, k=10)

# 检索相关文档
similar_documents = [vector_database.get(vector) for vector in similar_vectors]

# 过滤相关文档，只保留满足特定元数据条件的文档
filtered_documents = [
    document
    for document in similar_documents
    if document.metadata["source"] == "Wikipedia"
]

# 打印过滤后的相关文档的文本内容
for document in filtered_documents:
    print(document.page_content)