Skip to content

Commit

Permalink
fix: Added property graph extractors and property graph index retriev…
Browse files Browse the repository at this point in the history
…ers, explicitly.
  • Loading branch information
anirbanbasu committed Jun 5, 2024
1 parent 766a210 commit aa8f5b9
Showing 1 changed file with 21 additions and 43 deletions.
64 changes: 21 additions & 43 deletions ui/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,10 @@

from llama_index_client import Document
from llama_index.core import VectorStoreIndex, PropertyGraphIndex
from llama_index.core.indices.property_graph import SimpleLLMPathExtractor
from llama_index.core.indices.property_graph import (
SimpleLLMPathExtractor,
ImplicitPathExtractor,
)
from llama_index.core import load_index_from_storage
from llama_index.core import Settings
from llama_index.core.node_parser import SentenceSplitter
Expand All @@ -43,6 +46,11 @@
)
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core import get_response_synthesizer
from llama_index.core.indices.property_graph import (
PGRetriever,
VectorContextRetriever,
LLMSynonymRetriever,
)
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.chat_engine import ContextChatEngine
from llama_index.core.schema import TransformComponent
Expand Down Expand Up @@ -142,15 +150,16 @@ def initialise_chat_engine() -> bool:
sm.show_status_message(
message=f"**Initialising chat engine** from index using the _{sm.global_settings__index_chat_mode.value}_ chat mode."
)
# kg_retriever = KGTableRetriever(
# index=sm.global_knowledge_graph_index.value,
# embed_model=Settings.embed_model,
# retriever_mode="hybrid",
# graph_store_query_depth=2,
# similarity_top_k=2,
# verbose=True,
# )
kg_retriever = sm.global_knowledge_graph_index.value.as_retriever()
sub_retrievers = [
VectorContextRetriever(
sm.global_knowledge_graph_index.value.property_graph_store
),
LLMSynonymRetriever(
sm.global_knowledge_graph_index.value.property_graph_store
),
]
kg_retriever = PGRetriever(sub_retrievers=sub_retrievers)
# The vector context retriever can make the vector index retriever redundant, in the future.
vector_retriever = VectorIndexRetriever(
index=sm.global_semantic_search_index.value,
embed_model=Settings.embed_model,
Expand Down Expand Up @@ -252,10 +261,11 @@ def build_index_pipeline() -> bool:
# show_progress=True,
# )
kg_extractors = [
ImplicitPathExtractor(),
SimpleLLMPathExtractor(
llm=Settings.llm,
max_paths_per_chunk=sm.global_settings__index_max_triplets_per_chunk.value,
)
),
]
sm.global_knowledge_graph_index.value = PropertyGraphIndex(
nodes=chunk_nodes,
Expand Down Expand Up @@ -287,38 +297,6 @@ def build_index_pipeline() -> bool:
return True


# def build_index() -> bool:
# """Build the knowledge graph index from the documents. DEPRECATED: Use build_index_pipeline() instead."""
# # Try to output some kind of progress bar state?
# global ingested_documents
# chunk_parser = SentenceSplitter.from_defaults(
# chunk_size=Settings.chunk_size,
# chunk_overlap=Settings.chunk_overlap,
# include_metadata=True,
# include_prev_next_rel=True,
# )
# chunk_nodes = chunk_parser.get_nodes_from_documents(
# documents=ingested_documents.value, show_progress=True
# )
# sm.show_status_message(
# message=f"**Building index** from {len(chunk_nodes)} extracted chunks.",
# timeout=0,
# )
# sm.global_knowledge_graph_index.value = KnowledgeGraphIndex(
# nodes=chunk_nodes,
# llm=Settings.llm,
# embed_model=Settings.embed_model,
# storage_context=sm.global_llamaindex_storage_context.value,
# max_triplets_per_chunk=sm.global_settings__index_max_triplets_per_chunk.value,
# include_embeddings=sm.global_settings__index_include_embeddings.value,
# show_progress=True,
# )
# sm.global_knowledge_graph_index.value.storage_context.docstore.add_documents(
# chunk_nodes
# )
# return True


@task(prefer_threaded=True)
@observe()
async def ingest_wikipedia_data():
Expand Down

0 comments on commit aa8f5b9

Please sign in to comment.