In [1]:
from llama_index.core import SimpleDirectoryReader, StorageContext, load_index_from_storage, Settings
import os
from dotenv import load_dotenv
load_dotenv()
import nest_asyncio

nest_asyncio.apply()

In [3]:
documents = SimpleDirectoryReader("data/").load_data()

In [4]:
username = os.getenv("NEO4J_USERNAME")
password = os.getenv("NEO4J_PASSWORD")
url = os.getenv("NEO4J_URI")

In [5]:
from llama_index.core import PropertyGraphIndex
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.llms.openai import OpenAI
from llama_index.core.indices.property_graph import SchemaLLMPathExtractor
from llama_index.graph_stores.neo4j import Neo4jPropertyGraphStore
from IPython.display import Markdown, display

graph_store = Neo4jPropertyGraphStore(
    username=username,
    password=password,
    url=url,
)



In [6]:
storage_context = StorageContext.from_defaults(
    graph_store=graph_store
)

In [7]:
llm=OpenAI(model="gpt-4o-mini", temperature=0.3)
Settings.llm = llm

embed_model=OpenAIEmbedding(model_name="text-embedding-3-small")
Settings.embed_model = embed_model


In [8]:
index_pg = PropertyGraphIndex.from_documents(
    documents,
    llm=llm,
    embed_model=embed_model,
    show_progress=True,
    property_graph_store=graph_store,
    storage_context=storage_context,
)

  from .autonotebook import tqdm as notebook_tqdm
Parsing nodes: 100%|██████████| 40/40 [00:00<00:00, 318.77it/s]
Extracting paths from text: 100%|██████████| 56/56 [00:30<00:00,  1.84it/s]
Extracting implicit paths: 100%|██████████| 56/56 [00:00<00:00, 3585.86it/s]
Generating embeddings: 100%|██████████| 1/1 [00:03<00:00,  3.34s/it]
Generating embeddings: 100%|██████████| 11/11 [00:12<00:00,  1.14s/it]


In [9]:
index_pg.storage_context.persist(persist_dir="./pg-storage")
index_pg = load_index_from_storage(storage_context)

In [10]:
# Define retriever
retriever = index_pg.as_retriever(
    include_text=False,  # include source text in returned nodes, default True
)
results = retriever.retrieve("What is Graph Retrieval-Augmented Generation?")
for record in results:
    print(record.text)

Rag ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\Users\\jayit\\GCCD\\triplet-store-graph-rag\\data\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'name': 'Rag', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '4', 'triplet_source_id': '77fa7ee5-02b6-4108-be84-de699704cffd', 'file_type': 'application/pdf'}) -> Combines ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\Users\\jayit\\GCCD\\triplet-store-graph-rag\\data\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '4', 'triplet_source_id': '77fa7ee5-02b6-4108-be84-de699704cffd', 'file_type': 'application/pdf'}) -> External knowledge ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\Users\\jayit\\GCCD\\triplet-store-graph-rag\\data\\Graph

In [11]:
# Question answering
query_engine = index_pg.as_query_engine(include_text=True)
response = query_engine.query("What is Graph Retrieval-Augmented Generation?")
display(Markdown(f"{response}"))

Graph Retrieval-Augmented Generation (GraphRAG) is a framework designed for enhancing question-answering tasks by integrating relational knowledge from graph databases. It operates in three main stages: G-Indexing, G-Retrieval, and G-Generation. This approach emphasizes the retrieval of structured graph data, distinguishing it from traditional text-based methods. GraphRAG incorporates various enhancement techniques, such as query and knowledge enhancements, to improve the relevance and accuracy of the generated responses. By transforming retrieved graph information into formats suitable for language model generators, it aims to leverage the relationships and structural information inherent in graph data to enhance overall task performance.

In [14]:
response = query_engine.query("What is Query-Focused Summarization?")
display(Markdown(f"{response}"))

Query-Focused Summarization is a process that involves generating summaries of documents or information that are specifically tailored to address a particular query or information need. This approach enhances the relevance of the summary by ensuring that it captures the most pertinent information related to the user's query, rather than providing a general overview of the entire content. It typically utilizes techniques such as knowledge graphs and retrieval-augmented generation to improve the quality and relevance of the summaries produced.