# Querying

In [1]:
%reload_ext dotenv
%dotenv

import os

from graphrag_toolkit.lexical_graph.storage.graph.falkordb import FalkorDBGraphStoreFactory
from graphrag_toolkit.lexical_graph.storage import GraphStoreFactory
from graphrag_toolkit.lexical_graph.storage import VectorStoreFactory
from graphrag_toolkit.lexical_graph import set_logging_config

set_logging_config('INFO')

# Register the FalkorDB backend with the factory
GraphStoreFactory.register(FalkorDBGraphStoreFactory)

# Create graph and vector stores
graph_store = GraphStoreFactory.for_graph_store(os.environ['GRAPH_STORE'])
vector_store = VectorStoreFactory.for_vector_store(os.environ['VECTOR_STORE'])

  return torch._C._cuda_getDeviceCount() > 0


### SemanticGuidedRetriever

See [SemanticGuidedRetriever](https://github.com/awslabs/graphrag-toolkit/blob/main/docs/lexical-graph/querying.md#semanticguidedretriever).

In [2]:
from graphrag_toolkit.lexical_graph import LexicalGraphQueryEngine
from graphrag_toolkit.lexical_graph.retrieval.retrievers import RerankingBeamGraphSearch, StatementCosineSimilaritySearch, KeywordRankingSearch
from graphrag_toolkit.lexical_graph.retrieval.post_processors import SentenceReranker

cosine_retriever = StatementCosineSimilaritySearch(
    vector_store=vector_store,
    graph_store=graph_store,
    top_k=50
)

keyword_retriever = KeywordRankingSearch(
    vector_store=vector_store,
    graph_store=graph_store,
    max_keywords=10
)

reranker = SentenceReranker(
    batch_size=128
)

beam_retriever = RerankingBeamGraphSearch(
    vector_store=vector_store,
    graph_store=graph_store,
    reranker=reranker,
    initial_retrievers=[cosine_retriever, keyword_retriever],
    max_depth=8,
    beam_width=100
)

query_engine = LexicalGraphQueryEngine.for_semantic_guided_search(
    graph_store,
    vector_store,
    retrievers=[
        cosine_retriever,
        keyword_retriever,
        beam_retriever
    ],

)

response = query_engine.query("What are the similarities and differences between Neptune Database and Neptune Analytics?")

print(response.response)

  from .autonotebook import tqdm as notebook_tqdm


2025-05-29 16:48:56:INFO:datasets       :PyTorch version 2.7.0 available.
2025-05-29 16:49:02:INFO:g.l.r.r.rerank_beam_search:Retrieved 20 new nodes through beam search.
Neptune Database and Neptune Analytics are both part of Amazon Neptune, but they serve different purposes and have distinct features. Here are the key similarities and differences:

Similarities:
1. Both are graph database services offered by Amazon Web Services [source_4.1, source_4.9].
2. They are designed to work with graph data and highly connected datasets [source_3.14, source_3.25].
3. Both services aim to make it easier for users to work with graph data in the AWS Cloud [source_1.31].

Differences:
1. Purpose:
   - Neptune Database is a serverless graph database designed for optimal scalability and availability [source_1.3, source_1.4].
   - Neptune Analytics is an analytics database engine specifically for analyzing graph databases and datasets [source_1.1, source_1.2].

2. Use cases:
   - Neptune Database is s

## Set prompt from disk

In [3]:
from graphrag_toolkit.lexical_graph import LexicalGraphQueryEngine
from graphrag_toolkit.lexical_graph.prompts.file_prompt_provider import FilePromptProvider
from graphrag_toolkit.lexical_graph.prompts.prompt_provider_config import FilePromptProviderConfig
from graphrag_toolkit.lexical_graph.retrieval.retrievers import RerankingBeamGraphSearch, StatementCosineSimilaritySearch, KeywordRankingSearch
from graphrag_toolkit.lexical_graph.retrieval.post_processors import SentenceReranker

# Step 1: Setup your prompt provider config (pointing to your prompt files)
prompt_provider = FilePromptProvider(
    FilePromptProviderConfig(
        system_prompt_file="prompts/system.txt",
        user_prompt_file="prompts/user.txt"
    )
)

# Step 2: Setup your retrievers
cosine_retriever = StatementCosineSimilaritySearch(
    vector_store=vector_store,
    graph_store=graph_store,
    top_k=50
)

keyword_retriever = KeywordRankingSearch(
    vector_store=vector_store,
    graph_store=graph_store,
    max_keywords=10
)

reranker = SentenceReranker(batch_size=128)

beam_retriever = RerankingBeamGraphSearch(
    vector_store=vector_store,
    graph_store=graph_store,
    reranker=reranker,
    initial_retrievers=[cosine_retriever, keyword_retriever],
    max_depth=8,
    beam_width=100
)

# Step 3: Instantiate the query engine with prompt_provider
query_engine = LexicalGraphQueryEngine.for_semantic_guided_search(
    graph_store,
    vector_store,
    retrievers=[cosine_retriever, keyword_retriever, beam_retriever],
    prompt_provider=prompt_provider
)

# Step 4: Run your query
response = query_engine.query("What are the similarities and differences between Neptune Database and Neptune Analytics?")
print(response.response)


2025-05-29 16:49:24:INFO:g.l.p.file_prompt_provider:[Prompt Debug] Initialized FilePromptProvider
2025-05-29 16:49:24:INFO:g.l.p.file_prompt_provider:[Prompt Debug] Base path: ./prompts
2025-05-29 16:49:24:INFO:g.l.p.file_prompt_provider:[Prompt Debug] System prompt file: system_prompt.txt
2025-05-29 16:49:24:INFO:g.l.p.file_prompt_provider:[Prompt Debug] User prompt file: user_prompt.txt
2025-05-29 16:49:29:INFO:g.l.r.r.rerank_beam_search:Retrieved 21 new nodes through beam search.
{
  "answer": "Neptune Database and Neptune Analytics are both part of Amazon Neptune but serve different purposes. Neptune Database is a serverless graph database designed for operational workloads, while Neptune Analytics is an analytics engine for processing and analyzing large graph datasets. They have similarities in dealing with graph data but differ in their primary functions, performance characteristics, and use cases.",
  "supporting_facts": [
    "Neptune Database is a serverless graph database de

## Set prompt from S3

In [4]:
from graphrag_toolkit.lexical_graph import LexicalGraphQueryEngine
from graphrag_toolkit.lexical_graph.prompts.s3_prompt_provider import S3PromptProvider
from graphrag_toolkit.lexical_graph.prompts.prompt_provider_config import S3PromptProviderConfig
from graphrag_toolkit.lexical_graph.retrieval.retrievers import (
    RerankingBeamGraphSearch,
    StatementCosineSimilaritySearch,
    KeywordRankingSearch
)
from graphrag_toolkit.lexical_graph.retrieval.post_processors import SentenceReranker

# Step 1: Setup your S3 prompt provider
prompt_provider = S3PromptProvider(
    S3PromptProviderConfig(
        bucket="ccms-prompts",
        prefix="prompts",
        aws_region="ap-south-1",  # optional if not using env
        aws_profile="padmin",
        system_prompt_file="system_prompt.txt",
        user_prompt_file="user_prompt.txt",# optional if not using default
    )
)

# Step 2: Setup your retrievers
cosine_retriever = StatementCosineSimilaritySearch(
    vector_store=vector_store,
    graph_store=graph_store,
    top_k=50
)

keyword_retriever = KeywordRankingSearch(
    vector_store=vector_store,
    graph_store=graph_store,
    max_keywords=10
)

reranker = SentenceReranker(batch_size=128)

beam_retriever = RerankingBeamGraphSearch(
    vector_store=vector_store,
    graph_store=graph_store,
    reranker=reranker,
    initial_retrievers=[cosine_retriever, keyword_retriever],
    max_depth=8,
    beam_width=100
)

# Step 3: Instantiate the query engine with the S3-based prompt provider
query_engine = LexicalGraphQueryEngine.for_semantic_guided_search(
    graph_store,
    vector_store,
    retrievers=[cosine_retriever, keyword_retriever, beam_retriever],
    prompt_provider=prompt_provider
)

# Step 4: Run your query
response = query_engine.query("What are the similarities and differences between Neptune Database and Neptune Analytics?")
print(response.response)


2025-05-29 16:49:40:INFO:g.l.p.s3_prompt_provider:[Prompt Debug] Loading prompt from S3: s3://ccms-prompts/prompts/system_prompt.txt
2025-05-29 16:49:42:INFO:g.l.p.s3_prompt_provider:[Prompt Debug] Loading prompt from S3: s3://ccms-prompts/prompts/user_prompt.txt
2025-05-29 16:49:46:INFO:g.l.r.r.rerank_beam_search:Retrieved 21 new nodes through beam search.
{
  "answer": "Neptune Database and Neptune Analytics are both part of Amazon Neptune but serve different purposes. Neptune Database is a serverless graph database designed for operational workloads, while Neptune Analytics is an analytics engine for processing and analyzing large graph datasets. They have similarities in dealing with graph data but differ in their primary functions, performance characteristics, and use cases.",
  "supporting_facts": [
    "Neptune Database is a serverless graph database designed for optimal scalability and availability [source_1]",
    "Neptune Analytics is an analytics database engine for analyzin

## Bedrock Manage Prompt

In [5]:
from graphrag_toolkit.lexical_graph import LexicalGraphQueryEngine
from graphrag_toolkit.lexical_graph.prompts.prompt_provider_config import BedrockPromptProviderConfig
from graphrag_toolkit.lexical_graph.retrieval.retrievers import (
    RerankingBeamGraphSearch,
    StatementCosineSimilaritySearch,
    KeywordRankingSearch
)
from graphrag_toolkit.lexical_graph.retrieval.post_processors import SentenceReranker

# Step 1: Setup your Bedrock prompt provider
prompt_provider = BedrockPromptProviderConfig(
    aws_region="us-east-1",      # or your Bedrock region
    aws_profile="padmin",        # your SSO profile
    system_prompt_arn="KEOXPXUM00",  # shorthand or full ARN is okay
    user_prompt_arn="TSF4PI4A6C"
).build()

# Step 2: Setup retrievers
cosine_retriever = StatementCosineSimilaritySearch(
    vector_store=vector_store,
    graph_store=graph_store,
    top_k=50
)

keyword_retriever = KeywordRankingSearch(
    vector_store=vector_store,
    graph_store=graph_store,
    max_keywords=10
)

reranker = SentenceReranker(batch_size=128)

beam_retriever = RerankingBeamGraphSearch(
    vector_store=vector_store,
    graph_store=graph_store,
    reranker=reranker,
    initial_retrievers=[cosine_retriever, keyword_retriever],
    max_depth=8,
    beam_width=100
)

# Step 3: Instantiate query engine with Bedrock prompt provider
query_engine = LexicalGraphQueryEngine.for_semantic_guided_search(
    graph_store,
    vector_store,
    retrievers=[cosine_retriever, keyword_retriever, beam_retriever],
    prompt_provider=prompt_provider
)

# Step 4: Run your query
response = query_engine.query("What are the similarities and differences between Neptune Database and Neptune Analytics?")
print(response.response)


2025-05-29 16:49:57:INFO:g.l.p.bedrock_prompt_provider:[Prompt Debug] Using BedrockPromptProvider with:
  system_prompt_arn=KEOXPXUM00 (resolved=arn:aws:bedrock:us-east-1:188967239867:prompt/KEOXPXUM00, version=None)
  user_prompt_arn=TSF4PI4A6C (resolved=arn:aws:bedrock:us-east-1:188967239867:prompt/TSF4PI4A6C, version=None)
  region=us-east-1, profile=padmin
2025-05-29 16:50:02:INFO:g.l.r.r.rerank_beam_search:Retrieved 20 new nodes through beam search.
Neptune Database and Neptune Analytics are both part of Amazon Neptune, but they serve different purposes and have distinct features. Here are the key similarities and differences:

Similarities:
1. Both are graph database services offered by Amazon Web Services [source_4.1, source_4.9].
2. They are designed to work with graph data and highly connected datasets [source_3.14, source_3.19].
3. Both can be used as part of data analysis workflows [source_1.28, source_2.14].

Differences:
1. Purpose:
   - Neptune Database is a serverless gr