In [1]:
# attach to the same event-loop
import nest_asyncio

nest_asyncio.apply()
from dotenv import load_dotenv
load_dotenv()

import logging
import sys

# Set up the root logger
logger = logging.getLogger()
logger.setLevel(logging.INFO)  # Set logger level to INFO

# Clear out any existing handlers
logger.handlers = []

# Set up the StreamHandler to output to sys.stdout (Colab's output)
handler = logging.StreamHandler(sys.stdout)
handler.setLevel(logging.INFO)  # Set handler level to INFO

# Add the handler to the logger
logger.addHandler(handler)

In [2]:
import logging
import sys
import pandas as pd

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

from llama_index.core.evaluation import (
    DatasetGenerator,
    FaithfulnessEvaluator,
    RelevancyEvaluator,
    CorrectnessEvaluator,
    RetrieverEvaluator,
    generate_question_context_pairs,
)

from llama_index.core import (
    SimpleDirectoryReader,
    VectorStoreIndex,
    Response,
)

from llama_index.llms.openai import OpenAI

import os

In [3]:
os.environ["OPENAI_API_KEY"] = os.getenv('OPENAI_API_KEY')

In [4]:
documents = SimpleDirectoryReader("data/").load_data()

In [6]:
gpt4 = OpenAI(model="gpt-4o-mini", temperature=0.1)

dataset_generator = DatasetGenerator.from_documents(
    documents,
    llm=gpt4,
    show_progress=True,
)

eval_dataset = dataset_generator.generate_dataset_from_nodes(num=5)

Parsing nodes: 100%|██████████| 40/40 [00:00<00:00, 437.29it/s]
  return cls(
  0%|          | 0/5 [00:00<?, ?it/s]

HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


 20%|██        | 1/5 [00:02<00:08,  2.24s/it]

HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


 40%|████      | 2/5 [00:02<00:03,  1.28s/it]

HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


 60%|██████    | 3/5 [00:02<00:01,  1.35it/s]

HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


 80%|████████  | 4/5 [00:03<00:00,  1.31it/s]

HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


100%|██████████| 5/5 [00:04<00:00,  1.14it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


 10%|█         | 1/10 [00:00<00:08,  1.09it/s]

HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


 20%|██        | 2/10 [00:01<00:03,  2.18it/s]

HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


 40%|████      | 4/10 [00:01<00:01,  3.93it/s]

HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


 50%|█████     | 5/10 [00:01<00:01,  4.10it/s]

HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


 60%|██████    | 6/10 [00:01<00:01,  3.36it/s]

HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


100%|██████████| 10/10 [00:02<00:00,  4.05it/s]
  0%|          | 0/11 [00:00<?, ?it/s]

HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  9%|▉         | 1/11 [00:01<00:11,  1.20s/it]

HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


 18%|█▊        | 2/11 [00:01<00:04,  1.81it/s]

HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


 36%|███▋      | 4/11 [00:01<00:02,  2.84it/s]

HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


 45%|████▌     | 5/11 [00:01<00:01,  3.36it/s]

HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


 64%|██████▎   | 7/11 [00:02<00:00,  5.31it/s]

HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


 73%|███████▎  | 8/11 [00:02<00:00,  5.83it/s]

HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


 82%|████████▏ | 9/11 [00:02<00:00,  5.89it/s]

HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


 91%|█████████ | 10/11 [00:02<00:00,  5.04it/s]

HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


100%|██████████| 11/11 [00:04<00:00,  2.61it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


 10%|█         | 1/10 [00:01<00:09,  1.07s/it]

HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


 50%|█████     | 5/10 [00:01<00:01,  4.79it/s]

HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


 70%|███████   | 7/10 [00:01<00:00,  5.45it/s]

HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


 90%|█████████ | 9/10 [00:02<00:00,  4.17it/s]

HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


100%|██████████| 10/10 [00:02<00:00,  3.68it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


 10%|█         | 1/10 [00:01<00:12,  1.35s/it]

HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


 20%|██        | 2/10 [00:01<00:06,  1.19it/s]

HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


 40%|████      | 4/10 [00:02<00:02,  2.23it/s]

HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


 60%|██████    | 6/10 [00:02<00:01,  2.85it/s]

HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


 70%|███████   | 7/10 [00:03<00:01,  2.26it/s]

HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


 80%|████████  | 8/10 [00:03<00:00,  2.63it/s]

HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


 90%|█████████ | 9/10 [00:03<00:00,  2.99it/s]

HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


100%|██████████| 10/10 [00:04<00:00,  2.45it/s]
  0%|          | 0/11 [00:00<?, ?it/s]

HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  9%|▉         | 1/11 [00:01<00:12,  1.27s/it]

HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


 18%|█▊        | 2/11 [00:02<00:09,  1.01s/it]

HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


 27%|██▋       | 3/11 [00:02<00:05,  1.34it/s]

HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


 36%|███▋      | 4/11 [00:02<00:03,  2.01it/s]

HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


 55%|█████▍    | 6/11 [00:02<00:01,  3.39it/s]

HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


 64%|██████▎   | 7/11 [00:03<00:01,  3.12it/s]

HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


 73%|███████▎  | 8/11 [00:04<00:01,  2.18it/s]

HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


 82%|████████▏ | 9/11 [00:04<00:00,  2.40it/s]

HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


 91%|█████████ | 10/11 [00:04<00:00,  2.55it/s]

HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


100%|██████████| 11/11 [00:06<00:00,  1.82it/s]
  return QueryResponseDataset(queries=queries, responses=responses_dict)


In [7]:
eval_queries = list(eval_dataset.queries.values())
(eval_queries)

['What is the primary focus of the paper "Graph Retrieval-Augmented Generation: A Survey"?',
 'Describe the main challenges that Retrieval-Augmented Generation (RAG) aims to address in Large Language Models (LLMs).',
 'How does GraphRAG enhance the capabilities of traditional RAG systems?',
 'What are the three main components of the GraphRAG workflow as outlined in the paper?',
 'Identify and explain two core technologies mentioned in the paper that are utilized in the GraphRAG process.']

In [8]:
eval_query = "How did the author describe their early attempts at GraphRAG?"

In [9]:
from llama_index.llms.groq import Groq

groq = Groq(model="llama3-70b-8192", api_key=os.getenv('GROQ_API_KEY'))

# Fix GPT-4 LLM for evaluation
gpt4 = OpenAI(temperature=0, model="gpt-4o-mini")

In [10]:
# create vector index
vector_index = VectorStoreIndex.from_documents(documents, llm=groq)

# Query engine to generate response
query_engine = vector_index.as_query_engine()

HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


In [11]:
retriever = vector_index.as_retriever(similarity_top_k=3)
nodes = retriever.retrieve(eval_query)

HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


In [12]:
from IPython.display import display, HTML

display(HTML(f'{nodes[1].get_text()}'))

In [13]:
faithfulness_evaluator = FaithfulnessEvaluator(llm=gpt4)

In [14]:
# Generate response
response_vector = query_engine.query(eval_query)

HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


In [15]:
eval_result = faithfulness_evaluator.evaluate_response(
    response=response_vector
)

HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


In [16]:
eval_result.passing

True

In [17]:
eval_result

EvaluationResult(query=None, contexts=['Graph Retrieval-Augmented Generation: A Survey 111:3\nsubset of documents and fails to grasp global information comprehensively, and hence struggles\nwith tasks such as Query-Focused Summarization (QFS).\nGraph Retrieval-Augmented Generation (GraphRAG) [ 25,50,108] emerges as an innovative\nsolution to address these challenges. Unlike traditional RAG, GraphRAG retrieves graph elements\ncontaining relational knowledge pertinent to a given query from a pre-constructed graph database,\nas depicted in Figure 1. These elements may include nodes, triples, paths, or subgraphs, which are\nutilized to generate responses. GraphRAG considers the interconnections between texts, enabling a\nmore accurate and comprehensive retrieval of relational information. Additionally, graph data, such\nas knowledge graphs, offer abstraction and summarization of textual data, thereby significantly\nshortening the length of the input text and mitigating concerns of verbosit

In [18]:
# Create RelevancyEvaluator using GPT-4 LLM
relevancy_evaluator = RelevancyEvaluator(llm=gpt4)

In [19]:
# Generate response
response_vector = query_engine.query(eval_query)

# Evaluation
eval_result = relevancy_evaluator.evaluate_response(
    query=eval_query, response=response_vector
)

HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


In [20]:
eval_result.query

'How did the author describe their early attempts at GraphRAG?'

In [21]:
eval_result.response

'The author described their early attempts at GraphRAG as a solution that retrieves graph elements containing relational knowledge from a pre-constructed graph database to generate responses. This approach considers the interconnections between texts, enabling a more accurate and comprehensive retrieval of relational information. Additionally, by utilizing graph data such as knowledge graphs, the method offers abstraction and summarization of textual data, which helps in shortening the length of input text and addressing concerns related to verbosity.'

In [22]:
eval_result.passing

True

In [23]:
# Create Query Engine with similarity_top_k=3
query_engine = vector_index.as_query_engine(similarity_top_k=3)

# Create response
response_vector = query_engine.query(eval_query)

# Evaluate with each source node
eval_source_result_full = [
    relevancy_evaluator.evaluate(
        query=eval_query,
        response=response_vector.response,
        contexts=[source_node.get_content()],
    )
    for source_node in response_vector.source_nodes
]

# Evaluation result
eval_source_result = [
    "Pass" if result.passing else "Fail" for result in eval_source_result_full
]

HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


In [24]:
eval_source_result

['Pass', 'Fail', 'Fail']

In [25]:
correctness_evaluator = CorrectnessEvaluator(llm=gpt4)

In [26]:
query = "Can you explain the theory of relativity proposed by Albert Einstein in detail?"

reference = """
Certainly! Albert Einstein's theory of relativity consists of two main components: special relativity and general relativity. Special relativity, published in 1905, introduced the concept that the laws of physics are the same for all non-accelerating observers and that the speed of light in a vacuum is a constant, regardless of the motion of the source or observer. It also gave rise to the famous equation E=mc², which relates energy (E) and mass (m).

General relativity, published in 1915, extended these ideas to include the effects of gravity. According to general relativity, gravity is not a force between masses, as described by Newton's theory of gravity, but rather the result of the warping of space and time by mass and energy. Massive objects, such as planets and stars, cause a curvature in spacetime, and smaller objects follow curved paths in response to this curvature. This concept is often illustrated using the analogy of a heavy ball placed on a rubber sheet, causing it to create a depression that other objects (representing smaller masses) naturally move towards.

In essence, general relativity provided a new understanding of gravity, explaining phenomena like the bending of light by gravity (gravitational lensing) and the precession of the orbit of Mercury. It has been confirmed through numerous experiments and observations and has become a fundamental theory in modern physics.
"""

response = """
Certainly! Albert Einstein's theory of relativity consists of two main components: special relativity and general relativity. Special relativity, published in 1905, introduced the concept that the laws of physics are the same for all non-accelerating observers and that the speed of light in a vacuum is a constant, regardless of the motion of the source or observer. It also gave rise to the famous equation E=mc², which relates energy (E) and mass (m).

However, general relativity, published in 1915, extended these ideas to include the effects of magnetism. According to general relativity, gravity is not a force between masses but rather the result of the warping of space and time by magnetic fields generated by massive objects. Massive objects, such as planets and stars, create magnetic fields that cause a curvature in spacetime, and smaller objects follow curved paths in response to this magnetic curvature. This concept is often illustrated using the analogy of a heavy ball placed on a rubber sheet with magnets underneath, causing it to create a depression that other objects (representing smaller masses) naturally move towards due to magnetic attraction.
"""

In [27]:
correctness_result = correctness_evaluator.evaluate(
    query=query,
    response=response,
    reference=reference,
)

HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


In [28]:
correctness_result

EvaluationResult(query='Can you explain the theory of relativity proposed by Albert Einstein in detail?', contexts=None, response="\nCertainly! Albert Einstein's theory of relativity consists of two main components: special relativity and general relativity. Special relativity, published in 1905, introduced the concept that the laws of physics are the same for all non-accelerating observers and that the speed of light in a vacuum is a constant, regardless of the motion of the source or observer. It also gave rise to the famous equation E=mc², which relates energy (E) and mass (m).\n\nHowever, general relativity, published in 1915, extended these ideas to include the effects of magnetism. According to general relativity, gravity is not a force between masses but rather the result of the warping of space and time by magnetic fields generated by massive objects. Massive objects, such as planets and stars, create magnetic fields that cause a curvature in spacetime, and smaller objects foll

In [29]:
correctness_result.score

2.0

In [30]:
correctness_result.feedback

'The generated answer is relevant to the user query and covers the main components of the theory of relativity. However, it contains significant inaccuracies, particularly in its description of general relativity, incorrectly attributing the effects of magnetism to the warping of space and time instead of gravity. This misrepresentation detracts from the overall correctness of the answer.'

In [31]:
retrieved_nodes = retriever.retrieve(eval_query)

HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


In [32]:
from llama_index.core.response.notebook_utils import display_source_node

for node in retrieved_nodes:
    display_source_node(node, source_length=2000)

**Node ID:** 4922b6a9-c870-49a6-a82b-932df1cfd397<br>**Similarity:** 0.8485058359827878<br>**Text:** Graph Retrieval-Augmented Generation: A Survey 111:3
subset of documents and fails to grasp global information comprehensively, and hence struggles
with tasks such as Query-Focused Summarization (QFS).
Graph Retrieval-Augmented Generation (GraphRAG) [ 25,50,108] emerges as an innovative
solution to address these challenges. Unlike traditional RAG, GraphRAG retrieves graph elements
containing relational knowledge pertinent to a given query from a pre-constructed graph database,
as depicted in Figure 1. These elements may include nodes, triples, paths, or subgraphs, which are
utilized to generate responses. GraphRAG considers the interconnections between texts, enabling a
more accurate and comprehensive retrieval of relational information. Additionally, graph data, such
as knowledge graphs, offer abstraction and summarization of textual data, thereby significantly
shortening the length of the input text and mitigating concerns of verbosity. By retrieving subgraphs
or graph communities, we can access comprehensive information to effectively address the QFS
challenge by capturing the broader context and interconnections within the graph structure.
In this paper, we are the first to provide a systematic survey of GraphRAG. Specifically, we
begin by introducing the GraphRAG workflow, along with the foundational background knowledge
that underpins the field. Then, we categorize the literature according to the primary stages of the
GraphRAG process: Graph-Based Indexing (G-Indexing), Graph-Guided Retrieval (G-Retrieval),
and Graph-Enhanced Generation (G-Generation) in Section 5, Section 6 and Section 7 respectively,
detailing the core technologies and training methods within each phase. Furthermore, we investigate
downstream tasks, application domains, evaluation methodologies, and industrial use cases of
GraphRAG. This exploration elucidates how GraphRAG is being utilized in practical settings and
reflects its versatility and adaptability across various sectors. Finally...<br>

**Node ID:** b341b680-bc22-4cc5-963d-a10f994f26ad<br>**Similarity:** 0.8465902753907227<br>**Text:** 111:30 Peng et al.
GraphRAG can support medical diagnosis, patient record analysis, and personalized treatment
plans by integrating medical literature, patient histories, and real-time health data. In financial
services, GraphRAG can be utilized for fraud detection, risk assessment, and personalized financial
advice by analyzing transactional data, market trends, and customer profiles. Legal and compliance
applications can benefit from GraphRAG by enabling comprehensive legal research, contract analy-
sis, and regulatory compliance monitoring through the integration of legal documents, case law,
and regulatory updates. Expanding GraphRAG to these diverse and complex domains will enhance
its utility and impact, providing more sophisticated and targeted solutions across various fields.
11 Conclusion
In summary, this survey offers a comprehensive retrospective of GraphRAG technology, system-
atically categorizing and organizing its fundamental techniques, training methodologies, and
application scenarios. GraphRAG significantly enhances the relevance, accuracy, and comprehen-
siveness of information retrieval by leveraging pivotal relational knowledge derived from graph
datasets, thereby addressing critical limitations associated with traditional Retrieval-Augmented
Generation approaches. Furthermore, as GraphRAG represents a relatively nascent field of study,
we delineate the benchmarks, analyze prevailing challenges, and illuminate prospective future
research directions within this domain.
Acknowledgments
This work is supported by Ant Group through Ant Research Intern Program.
References
[1]Muhammad Arslan and Christophe Cruz. 2024. Business-RAG: Information Extraction for Business Insights. ICSBT
2024 (2024), 88.
[2]Sören Auer, Christian Bizer, Georgi Kobilarov, Jens Lehmann, Richard Cyganiak, and Zachary G. Ives. 2007. DBpedia:
A Nucleus for a Web of Open Data. In The Semantic Web, 6th International Semantic Web Conference, 2nd Asian
Semantic Web Conference, ISW...<br>

**Node ID:** e4e35952-ed2b-4ff6-8822-59f5b17777fa<br>**Similarity:** 0.8454618965792327<br>**Text:** 111:28 Peng et al.
Summarization (QFS) task [ 25]. The project can also utilize open-source RAG toolkits for rapid
implementation, such as LlamaIndex11, LangChain12, etc.
•GraphRAG (by NebulaGraph)13: The project is the first industrial GraphRAG system, which
is developed by NebulaGraph Corporation. The project integrates LLMs into the NebulaGraph
database, which aims to deliver more intelligent and precise search results.
•GraphRAG (by Antgroup)14: The framework is developed on the foundation of several AI
engineering frameworks such as DB-GPT, knowledge graph engine OpenSPG, and graph database
TuGraph. Specifically, the system begins by extracting triples from documents using LLMs, which
are then stored in the graph database. During the retrieval phase, it identifies keywords from the
query, locates corresponding nodes in the graph database, and traverses the subgraph using BFS
or DFS. In the generation phase, the retrieved subgraph data is formatted into text and submitted
along with the context and query for processing by LLMs.
•NallM (by Neo4j)15: The NaLLM (Neo4j and Large Language Models) framework integrates
Neo4j graph database technology with LLMs. It aims to explore and demonstrate the synergy
between Neo4j and LLMs, focusing on three primary use cases: Natural Language Interface to a
Knowledge Graph, Creating a Knowledge Graph from Unstructured Data, and Generate Reports
Using Both Static Data and LLM Data.
•LLM Graph Builder (by Neo4j)16: It is a project developed by Neo4j for automatically construct-
ing knowledge graphs, suitable for the GraphRAG’s Graph Database Construction and Indexing
phase. The project primarily utilizes LLMs to extract nodes, relationships, and their properties from
unstructured data, and utilizes the LangChain framework to create structured knowledge graphs.
10 Future Prospects
While GraphRAG technology has made substantial strides, it continues to face enduring challenges
that demand comprehensive exploration. This section ...<br>

In [33]:
qa_dataset = generate_question_context_pairs(
    nodes, llm=gpt4, num_questions_per_chunk=2
)

  0%|          | 0/3 [00:00<?, ?it/s]

HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


 33%|███▎      | 1/3 [00:02<00:05,  2.56s/it]

HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


 67%|██████▋   | 2/3 [00:03<00:01,  1.70s/it]

HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


100%|██████████| 3/3 [00:04<00:00,  1.65s/it]


In [34]:
queries = qa_dataset.queries.values()
print(list(queries)[5])

What are the two primary projects mentioned that integrate LLMs with Neo4j, and what specific use cases do they aim to address in the context of knowledge graphs?


In [35]:
len(list(queries))

6

In [36]:
retriever_evaluator = RetrieverEvaluator.from_metric_names(
    ["mrr", "hit_rate"], retriever=retriever
)

In [37]:
# try it out on a sample query
sample_id, sample_query = list(qa_dataset.queries.items())[0]
sample_expected = qa_dataset.relevant_docs[sample_id]

eval_result = retriever_evaluator.evaluate(sample_query, sample_expected)
print(eval_result)

HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
Query: Explain the significance of Graph Retrieval-Augmented Generation (GraphRAG) in addressing the challenges of Query-Focused Summarization (QFS). How does it differ from traditional Retrieval-Augmented Generation (RAG)?
Metrics: {'mrr': 1.0, 'hit_rate': 1.0}



In [38]:
# try it out on an entire dataset
eval_results = await retriever_evaluator.aevaluate_dataset(qa_dataset)

HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


In [39]:
def display_results(name, eval_results):
    """Display results from evaluate."""

    metric_dicts = []
    for eval_result in eval_results:
        metric_dict = eval_result.metric_vals_dict
        metric_dicts.append(metric_dict)

    full_df = pd.DataFrame(metric_dicts)

    hit_rate = full_df["hit_rate"].mean()
    mrr = full_df["mrr"].mean()

    metric_df = pd.DataFrame(
        {"retrievers": [name], "hit_rate": [hit_rate], "mrr": [mrr]}
    )

    return metric_df

In [40]:
display_results("top-2 eval", eval_results)

Unnamed: 0,retrievers,hit_rate,mrr
0,top-2 eval,0.833333,0.833333
