In [None]:
# LlamaIndex 对 RAG 过程提供了全面的配置支持，允许开发者对整个过程进行个性化设置。常见的配置场景包括：
# 
# 自定义文档分块
# 自定义向量存储
# 自定义检索
# 指定 LLM
# 指定响应模式
# 指定流式响应
# 注，个性化配置主要通过 LlamaIndex 提供的 ServiceContext 类实现。

In [ ]:
# 自定义文档分块
from llama_index.core import ServiceContext
from llama_index.core import Settings

Settings.service_context = ServiceContext.from_defaults(chunk_size=500)
# service_context = ServiceContext.from_defaults(chunk_size=500)

In [ ]:
# Indexing & Embedding  索引和嵌入
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core import VectorStoreIndex
from llama_index.core import Document

# Indexing 
doc = Document(text="text")
text_splitter = SentenceSplitter(chunk_size=512, chunk_overlap=10)

Settings.text_splitter = text_splitter

# per-index
index = VectorStoreIndex.from_documents(
    doc, transformations=[text_splitter]
)

In [ ]:
 # Storing# 储存
# 一旦你有数据加载和索引，你可能会想存储它，以避免重新索引它的时间和成本。默认情况下，索引数据只存储在内存中。

# Persisting to disk# 持久化到磁盘#
index.storage_context.persist(persist_dir="<persist_dir>")

# graph.root_index.storage_context.persist(persist_dir="<persist_dir>")

In [ ]:
from llama_index.core import StorageContext, load_index_from_storage

# 通过这样加载持久化索引来避免重新加载和重新索引数据：


# rebuild storage context
storage_context = StorageContext.from_defaults(persist_dir="<persist_dir>")

# load index
index = load_index_from_storage(storage_context)

In [ ]:
# Using Vector Stores# 使用矢量存储#
import chromadb
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.legacy.vector_stores.chroma import ChromaVectorStore
from llama_index.core import StorageContext

# load some documents
documents = SimpleDirectoryReader("./data").load_data()

# initialize client, setting path to save data
db = chromadb.PersistentClient(path="./chroma_db")

# create collection
chroma_collection = db.get_or_create_collection("quickstart")

# assign chroma as the vector_store to the context
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

# create your index
index = VectorStoreIndex.from_documents(
    documents, storage_context=storage_context
)

# create a query engine and query
query_engine = index.as_query_engine()
response = query_engine.query("What is the meaning of life?")
print(response)

In [ ]:
# 如果你已经创建并存储了嵌入，你需要直接加载它们，而不需要加载你的文档或创建一个新的VectorStoreIndex：
import chromadb
from llama_index.core import VectorStoreIndex
from llama_index.legacy.vector_stores.chroma import ChromaVectorStore
from llama_index.core import StorageContext

# initialize client
db = chromadb.PersistentClient(path="./chroma_db")

# get collection
chroma_collection = db.get_or_create_collection("quickstart")

# assign chroma as the vector_store to the context
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

# load your index from stored vectors
index = VectorStoreIndex.from_vector_store(
    vector_store, storage_context=storage_context
)

# create a query engine
query_engine = index.as_query_engine()
response = query_engine.query("What is llama2?")
print(response)

In [ ]:
# 如果您已经创建了索引，则可以使用 insert 方法将新文档添加到索引中。
# 插入文档或节点#

from llama_index.core import VectorStoreIndex

index = VectorStoreIndex([])
for doc in documents:
    index.insert(doc)

In [ ]:
# 现在，您已经加载了数据，构建了索引，并存储了该索引以供以后使用，您已经准备好进入LLM应用程序最重要的部分：查询。

In [ ]:
query_engine = index.as_query_engine()
response = query_engine.query(
    "Write an email to the user given their background information."
)
print(response)

In [ ]:
# Customizing the stages of querying#
# 自定义查询阶段#

In [ ]:
from llama_index.core import VectorStoreIndex, get_response_synthesizer
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.postprocessor import SimilarityPostprocessor

# build index
index = VectorStoreIndex.from_documents(documents)

# configure retriever配置检索器
retriever = VectorIndexRetriever(
    index=index,
    similarity_top_k=10,
)

# configure response synthesizer
response_synthesizer = get_response_synthesizer()

# assemble query engine
# Configuring node postprocessors#
# 配置节点后处理器#
# 支持先进的 Node 过滤和增强，可以进一步提高检索到的 Node 对象的相关性。这可以帮助减少LLM呼叫的时间/数量/成本或提高响应质量。
query_engine = RetrieverQueryEngine(
    retriever=retriever,
    response_synthesizer=response_synthesizer,
    # SimilarityPostprocessor 通过设置相似性分数的阈值来过滤节点（因此仅支持基于嵌入的检索器）
    node_postprocessors=[SimilarityPostprocessor(similarity_cutoff=0.7)],
)

# query
response = query_engine.query("What did the author do growing up?")
print(response)

In [ ]:
# 自定义检索中，我们可以通过参数指定查询引擎(Query Engine)在检索时请求的相似文档数。
index = VectorStoreIndex.from_documents(documents)
query_engine = index.as_query_engine(similarity_top_k=5)

In [ ]:
# 指定 LLM
service_context = ServiceContext.from_defaults(llm=OpenAI())

In [ ]:
# 指定响应模式
query_engine = index.as_query_engine(response_mode='tree_summarize')

In [ ]:
# 指定流式响应
query_engine = index.as_query_engine(streaming=True)