From aa8f5b9e6fcb1e05d4bf57a0a38b2f0b95a95264 Mon Sep 17 00:00:00 2001 From: Anirban Basu Date: Wed, 5 Jun 2024 19:02:27 +0900 Subject: [PATCH] fix: Added property graph extractors and property graph index retrievers, explicitly. --- ui/ingest.py | 64 +++++++++++++++++----------------------------------- 1 file changed, 21 insertions(+), 43 deletions(-) diff --git a/ui/ingest.py b/ui/ingest.py index c3631f0..427eefa 100644 --- a/ui/ingest.py +++ b/ui/ingest.py @@ -31,7 +31,10 @@ from llama_index_client import Document from llama_index.core import VectorStoreIndex, PropertyGraphIndex -from llama_index.core.indices.property_graph import SimpleLLMPathExtractor +from llama_index.core.indices.property_graph import ( + SimpleLLMPathExtractor, + ImplicitPathExtractor, +) from llama_index.core import load_index_from_storage from llama_index.core import Settings from llama_index.core.node_parser import SentenceSplitter @@ -43,6 +46,11 @@ ) from llama_index.core.retrievers import VectorIndexRetriever from llama_index.core import get_response_synthesizer +from llama_index.core.indices.property_graph import ( + PGRetriever, + VectorContextRetriever, + LLMSynonymRetriever, +) from llama_index.core.query_engine import RetrieverQueryEngine from llama_index.core.chat_engine import ContextChatEngine from llama_index.core.schema import TransformComponent @@ -142,15 +150,16 @@ def initialise_chat_engine() -> bool: sm.show_status_message( message=f"**Initialising chat engine** from index using the _{sm.global_settings__index_chat_mode.value}_ chat mode." ) - # kg_retriever = KGTableRetriever( - # index=sm.global_knowledge_graph_index.value, - # embed_model=Settings.embed_model, - # retriever_mode="hybrid", - # graph_store_query_depth=2, - # similarity_top_k=2, - # verbose=True, - # ) - kg_retriever = sm.global_knowledge_graph_index.value.as_retriever() + sub_retrievers = [ + VectorContextRetriever( + sm.global_knowledge_graph_index.value.property_graph_store + ), + LLMSynonymRetriever( + sm.global_knowledge_graph_index.value.property_graph_store + ), + ] + kg_retriever = PGRetriever(sub_retrievers=sub_retrievers) + # The vector context retriever can make the vector index retriever redundant, in the future. vector_retriever = VectorIndexRetriever( index=sm.global_semantic_search_index.value, embed_model=Settings.embed_model, @@ -252,10 +261,11 @@ def build_index_pipeline() -> bool: # show_progress=True, # ) kg_extractors = [ + ImplicitPathExtractor(), SimpleLLMPathExtractor( llm=Settings.llm, max_paths_per_chunk=sm.global_settings__index_max_triplets_per_chunk.value, - ) + ), ] sm.global_knowledge_graph_index.value = PropertyGraphIndex( nodes=chunk_nodes, @@ -287,38 +297,6 @@ def build_index_pipeline() -> bool: return True -# def build_index() -> bool: -# """Build the knowledge graph index from the documents. DEPRECATED: Use build_index_pipeline() instead.""" -# # Try to output some kind of progress bar state? -# global ingested_documents -# chunk_parser = SentenceSplitter.from_defaults( -# chunk_size=Settings.chunk_size, -# chunk_overlap=Settings.chunk_overlap, -# include_metadata=True, -# include_prev_next_rel=True, -# ) -# chunk_nodes = chunk_parser.get_nodes_from_documents( -# documents=ingested_documents.value, show_progress=True -# ) -# sm.show_status_message( -# message=f"**Building index** from {len(chunk_nodes)} extracted chunks.", -# timeout=0, -# ) -# sm.global_knowledge_graph_index.value = KnowledgeGraphIndex( -# nodes=chunk_nodes, -# llm=Settings.llm, -# embed_model=Settings.embed_model, -# storage_context=sm.global_llamaindex_storage_context.value, -# max_triplets_per_chunk=sm.global_settings__index_max_triplets_per_chunk.value, -# include_embeddings=sm.global_settings__index_include_embeddings.value, -# show_progress=True, -# ) -# sm.global_knowledge_graph_index.value.storage_context.docstore.add_documents( -# chunk_nodes -# ) -# return True - - @task(prefer_threaded=True) @observe() async def ingest_wikipedia_data():