From aa8f5b9e6fcb1e05d4bf57a0a38b2f0b95a95264 Mon Sep 17 00:00:00 2001
From: Anirban Basu <anirbanbasu@users.noreply.github.com>
Date: Wed, 5 Jun 2024 19:02:27 +0900
Subject: [PATCH] fix: Added property graph extractors and property graph index
 retrievers, explicitly.

---
 ui/ingest.py | 64 +++++++++++++++++-----------------------------------
 1 file changed, 21 insertions(+), 43 deletions(-)

diff --git a/ui/ingest.py b/ui/ingest.py
index c3631f0..427eefa 100644
--- a/ui/ingest.py
+++ b/ui/ingest.py
@@ -31,7 +31,10 @@
 
 from llama_index_client import Document
 from llama_index.core import VectorStoreIndex, PropertyGraphIndex
-from llama_index.core.indices.property_graph import SimpleLLMPathExtractor
+from llama_index.core.indices.property_graph import (
+    SimpleLLMPathExtractor,
+    ImplicitPathExtractor,
+)
 from llama_index.core import load_index_from_storage
 from llama_index.core import Settings
 from llama_index.core.node_parser import SentenceSplitter
@@ -43,6 +46,11 @@
 )
 from llama_index.core.retrievers import VectorIndexRetriever
 from llama_index.core import get_response_synthesizer
+from llama_index.core.indices.property_graph import (
+    PGRetriever,
+    VectorContextRetriever,
+    LLMSynonymRetriever,
+)
 from llama_index.core.query_engine import RetrieverQueryEngine
 from llama_index.core.chat_engine import ContextChatEngine
 from llama_index.core.schema import TransformComponent
@@ -142,15 +150,16 @@ def initialise_chat_engine() -> bool:
         sm.show_status_message(
             message=f"**Initialising chat engine** from index using the _{sm.global_settings__index_chat_mode.value}_ chat mode."
         )
-        # kg_retriever = KGTableRetriever(
-        #     index=sm.global_knowledge_graph_index.value,
-        #     embed_model=Settings.embed_model,
-        #     retriever_mode="hybrid",
-        #     graph_store_query_depth=2,
-        #     similarity_top_k=2,
-        #     verbose=True,
-        # )
-        kg_retriever = sm.global_knowledge_graph_index.value.as_retriever()
+        sub_retrievers = [
+            VectorContextRetriever(
+                sm.global_knowledge_graph_index.value.property_graph_store
+            ),
+            LLMSynonymRetriever(
+                sm.global_knowledge_graph_index.value.property_graph_store
+            ),
+        ]
+        kg_retriever = PGRetriever(sub_retrievers=sub_retrievers)
+        # The vector context retriever can make the vector index retriever redundant, in the future.
         vector_retriever = VectorIndexRetriever(
             index=sm.global_semantic_search_index.value,
             embed_model=Settings.embed_model,
@@ -252,10 +261,11 @@ def build_index_pipeline() -> bool:
     #     show_progress=True,
     # )
     kg_extractors = [
+        ImplicitPathExtractor(),
         SimpleLLMPathExtractor(
             llm=Settings.llm,
             max_paths_per_chunk=sm.global_settings__index_max_triplets_per_chunk.value,
-        )
+        ),
     ]
     sm.global_knowledge_graph_index.value = PropertyGraphIndex(
         nodes=chunk_nodes,
@@ -287,38 +297,6 @@ def build_index_pipeline() -> bool:
     return True
 
 
-# def build_index() -> bool:
-#     """Build the knowledge graph index from the documents. DEPRECATED: Use build_index_pipeline() instead."""
-#     # Try to output some kind of progress bar state?
-#     global ingested_documents
-#     chunk_parser = SentenceSplitter.from_defaults(
-#         chunk_size=Settings.chunk_size,
-#         chunk_overlap=Settings.chunk_overlap,
-#         include_metadata=True,
-#         include_prev_next_rel=True,
-#     )
-#     chunk_nodes = chunk_parser.get_nodes_from_documents(
-#         documents=ingested_documents.value, show_progress=True
-#     )
-#     sm.show_status_message(
-#         message=f"**Building index** from {len(chunk_nodes)} extracted chunks.",
-#         timeout=0,
-#     )
-#     sm.global_knowledge_graph_index.value = KnowledgeGraphIndex(
-#         nodes=chunk_nodes,
-#         llm=Settings.llm,
-#         embed_model=Settings.embed_model,
-#         storage_context=sm.global_llamaindex_storage_context.value,
-#         max_triplets_per_chunk=sm.global_settings__index_max_triplets_per_chunk.value,
-#         include_embeddings=sm.global_settings__index_include_embeddings.value,
-#         show_progress=True,
-#     )
-#     sm.global_knowledge_graph_index.value.storage_context.docstore.add_documents(
-#         chunk_nodes
-#     )
-#     return True
-
-
 @task(prefer_threaded=True)
 @observe()
 async def ingest_wikipedia_data():