# Traversal-Based Querying

## Setup

If you haven't already, install the toolkit and dependencies using the [Setup](./00-Setup.ipynb) notebook.

### TraversalBasedRetriever

See [TraversalBasedRetriever](https://github.com/awslabs/graphrag-toolkit/blob/main/docs/lexical-graph/querying.md#traversalbasedretriever).

In [None]:
%reload_ext dotenv
%dotenv

import os

from graphrag_toolkit.lexical_graph import set_logging_config
from graphrag_toolkit.lexical_graph import LexicalGraphQueryEngine
from graphrag_toolkit.lexical_graph.storage import GraphStoreFactory
from graphrag_toolkit.lexical_graph.storage import VectorStoreFactory

set_logging_config('INFO')

graph_store = GraphStoreFactory.for_graph_store(os.environ['GRAPH_STORE'])
vector_store = VectorStoreFactory.for_vector_store(os.environ['VECTOR_STORE'])

query_engine = LexicalGraphQueryEngine.for_traversal_based_search(
    graph_store, 
    vector_store
)

response = query_engine.query("What are the differences between Neptune Database and Neptune Analytics?")

print(f"""{response.response}

retrieve_ms: {int(response.metadata['retrieve_ms'])}
answer_ms  : {int(response.metadata['answer_ms'])}
total_ms   : {int(response.metadata['total_ms'])}
""")

#### Show the context passed to the LLM:

In [None]:
for n in response.source_nodes:
    print(n.text)

#### Show the underlying results:

In [None]:
import json
for n in response.source_nodes:
    print(json.dumps(n.metadata, indent=2))

#### Visualise the results:

In [None]:
from graphrag_toolkit.lexical_graph.retrieval.model import SearchResult

def get_query_params_for_results(response, include_sources=True, include_facts=True, limit=-1):

    statement_ids = []
    source_params = []
    fact_params = []
    
    nodes = response[:limit] if isinstance(response, list) else response.source_nodes[:limit]
    
    for n in nodes:
        
        search_result = SearchResult.model_validate(n.metadata)
        source_id = search_result.source.sourceId
        
        for topic in search_result.topics:
            
            for statement in topic.statements:
                
                statement_id = statement.statementId
                chunk_id = statement.chunkId
                
                statement_ids.append(statement_id)
                if include_sources:
                    source_params.append({'s': source_id, 'c': chunk_id, 'l': statement_id})
                if include_facts:
                    fact_params.append(statement_id)
                    
    
    query_parameters = { 
        'statement_ids': statement_ids,
        'source_params': source_params,
        'fact_params': fact_params
    }
    
    return query_parameters
    
query_parameters = get_query_params_for_results(response, limit=10)

In [None]:
display_var = '{"__Source__":"url","__Chunk__":"value","__Topic__":"value","__Statement__":"value","__Fact__":"value"}'

In [None]:
%%oc --query-parameters query_parameters -d $display_var -l 20

UNWIND $source_params AS source_params
MATCH p=(s:`__Source__`)<--(c:`__Chunk__`)<--(t:`__Topic__`)<--(l:`__Statement__`)
WHERE id(s) = source_params.s 
    AND id(c) = source_params.c 
    AND id(l) = source_params.l
RETURN p
UNION
MATCH p=(x:`__Source__`)<--(:`__Chunk__`)<--(:`__Topic__`)<--(l:`__Statement__`)<-[:`__SUPPORTS__`]-(:`__Fact__`)-[:`__NEXT__`*0..1]->(:`__Fact__`)-[:`__SUPPORTS__`]->(ll:`__Statement__`)-->(:`__Topic__`)-->(:`__Chunk__`)-->(y:`__Source__`)
WHERE id(l) IN $fact_params
    AND id(ll) IN $fact_params
    AND x <> y
RETURN p
UNION
MATCH p=(l:`__Statement__`)
WHERE id(l) IN $statement_ids
RETURN p

#### Metadata filtering

In [None]:
%reload_ext dotenv
%dotenv

import os

from graphrag_toolkit.lexical_graph import set_logging_config
from graphrag_toolkit.lexical_graph import LexicalGraphQueryEngine
from graphrag_toolkit.lexical_graph.storage import GraphStoreFactory
from graphrag_toolkit.lexical_graph.storage import VectorStoreFactory
from graphrag_toolkit.lexical_graph.metadata import FilterConfig

from llama_index.core.vector_stores.types import FilterOperator, MetadataFilter

set_logging_config('INFO')

graph_store = GraphStoreFactory.for_graph_store(os.environ['GRAPH_STORE'])
vector_store = VectorStoreFactory.for_vector_store(os.environ['VECTOR_STORE'])

query_engine = LexicalGraphQueryEngine.for_traversal_based_search(
    graph_store, 
    vector_store,
    filter_config = FilterConfig(
        MetadataFilter(
            key='url',
            value='https://docs.aws.amazon.com/neptune/latest/userguide/intro.html',
            operator=FilterOperator.EQ
        )
    )
)

response = query_engine.query("What are the differences between Neptune Database and Neptune Analytics?")

print(f"""{response.response}

retrieve_ms: {int(response.metadata['retrieve_ms'])}
answer_ms  : {int(response.metadata['answer_ms'])}
total_ms   : {int(response.metadata['total_ms'])}
""")

In [None]:
for n in response.source_nodes:
    print(n.text)

#### Set subretriever

In the example below, the `TraversalBasedRetriever` is configured with a `ChunkBasedSearch` subretriever. (You can also try with `EntityBasedSearch` and `EntityContextSearch`).

In [None]:
%reload_ext dotenv
%dotenv

import os

from graphrag_toolkit.lexical_graph import LexicalGraphQueryEngine
from graphrag_toolkit.lexical_graph.storage import GraphStoreFactory
from graphrag_toolkit.lexical_graph.storage import VectorStoreFactory
from graphrag_toolkit.lexical_graph.retrieval.retrievers import ChunkBasedSearch
from graphrag_toolkit.lexical_graph.retrieval.retrievers import EntityBasedSearch
from graphrag_toolkit.lexical_graph.retrieval.retrievers import EntityContextSearch

graph_store = GraphStoreFactory.for_graph_store(os.environ['GRAPH_STORE'])
vector_store = VectorStoreFactory.for_vector_store(os.environ['VECTOR_STORE'])

query_engine = LexicalGraphQueryEngine.for_traversal_based_search(
    graph_store, 
    vector_store,
    retrievers=[ChunkBasedSearch]
)

response = query_engine.query("What are the differences between Neptune Database and Neptune Analytics?")

print(f"""{response.response}

retrieve_ms: {int(response.metadata['retrieve_ms'])}
answer_ms  : {int(response.metadata['answer_ms'])}
total_ms   : {int(response.metadata['total_ms'])}
""")