Sub Question Query Engine


In [7]:
from dotenv import load_dotenv
import os
load_dotenv()
api_key = os.environ["OPENAI_API_KEY"]

In [13]:
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.core.tools import QueryEngineTool, ToolMetadata
from llama_index.core.query_engine import SubQuestionQueryEngine
from llama_index.core.callbacks import CallbackManager, LlamaDebugHandler
from llama_index.core import Settings

In [14]:
# Using the LlamaDebugHandler to print the trace of the sub questions
# captured by the SUB_QUESTION callback event type
llama_debug = LlamaDebugHandler(print_trace_on_end=True)
callback_manager = CallbackManager([llama_debug])

Settings.callback_manager = callback_manager

In [15]:
# load data
pg_essay = SimpleDirectoryReader(input_dir="data").load_data()

# build index and query engine
vector_query_engine = VectorStoreIndex.from_documents(
    pg_essay,
    use_async=True,
).as_query_engine()

**********
Trace: index_construction
    |_CBEventType.EMBEDDING -> 3.440705 seconds
    |_CBEventType.EMBEDDING -> 3.44502 seconds
**********


In [16]:
# setup base query engine as tool
query_engine_tools = [
    QueryEngineTool(
        query_engine=vector_query_engine,
        metadata=ToolMetadata(
            name="pg_essay",
            description="Paul Graham essay on What I Worked On",
        ),
    ),
]

query_engine = SubQuestionQueryEngine.from_defaults(
    query_engine_tools=query_engine_tools,
    use_async=True,
)

In [21]:
response = query_engine.query(
    "Risk Factors"
)

Generated 1 sub questions.
[1;3;38;2;237;90;200m[pg_essay] Q: What are the risk factors discussed in Paul Graham's essay on What I Worked On?
[0m[1;3;38;2;237;90;200m[pg_essay] A: The risk factors discussed in Paul Graham's essay on "What I Worked On" are not provided in the context information.
[0m**********
Trace: query
    |_CBEventType.QUERY -> 2.805887 seconds
      |_CBEventType.LLM -> 0.805158 seconds
      |_CBEventType.SUB_QUESTION -> 1.373905 seconds
        |_CBEventType.QUERY -> 1.362989 seconds
          |_CBEventType.RETRIEVE -> 0.67959 seconds
            |_CBEventType.EMBEDDING -> 0.67959 seconds
          |_CBEventType.SYNTHESIZE -> 0.683399 seconds
            |_CBEventType.TEMPLATING -> 0.0 seconds
            |_CBEventType.LLM -> 0.668308 seconds
      |_CBEventType.SYNTHESIZE -> 0.622765 seconds
        |_CBEventType.TEMPLATING -> 0.0 seconds
        |_CBEventType.LLM -> 0.620755 seconds
**********


In [22]:
response

Response(response='The risk factors discussed in Paul Graham\'s essay on "What I Worked On" are not provided in the context information.', source_nodes=[NodeWithScore(node=TextNode(id_='654ee162-d3c6-4897-81ba-ebde471b7e53', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text='Sub question: What are the risk factors discussed in Paul Graham\'s essay on What I Worked On?\nResponse: The risk factors discussed in Paul Graham\'s essay on "What I Worked On" are not provided in the context information.', mimetype='text/plain', start_char_idx=None, end_char_idx=None, metadata_seperator='\n', text_template='{metadata_str}\n\n{content}'), score=None), NodeWithScore(node=TextNode(id_='88ee26ef-9f5e-4cee-ae4b-bd6e09f28737', embedding=None, metadata={'page_label': '11', 'file_name': 'form10-k.pdf', 'file_path': 'c:\\Users\\Nanhi Deo\\Desktop\\LangchainRag\\data\\form10-k.pdf

In [23]:
from IPython.display import Markdown , display
display(Markdown(f"{response}"))

The risk factors discussed in Paul Graham's essay on "What I Worked On" are not provided in the context information.