# Router Query Engine

In [None]:
from dotenv import load_dotenv
load_dotenv()

True

### Import RagaAI Catalyst components for tracing

In [2]:
from ragaai_catalyst.tracers import Tracer
from ragaai_catalyst import RagaAICatalyst, init_tracing
import os

catalyst = RagaAICatalyst(
    access_key=os.getenv("RAGAAI_CATALYST_ACCESS_KEY"),
    secret_key=os.getenv("RAGAAI_CATALYST_SECRET_KEY"),
    base_url=os.getenv("RAGAAI_CATALYST_BASE_URL"),
)

# Initialize tracer
tracer = Tracer(
    project_name="llama_index_testing",
    dataset_name="router_query_engine",
    tracer_type="llamaindex",
)

init_tracing(catalyst=catalyst, tracer=tracer)

INFO:httpx:HTTP Request: GET https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json "HTTP/1.1 200 OK"


Token(s) set successfully


In [3]:
from llama_index.core.workflow import Event
from llama_index.core.base.base_selector import SelectorResult
from typing import Dict, List, Any
from llama_index.core.base.response.schema import RESPONSE_TYPE


class QueryEngineSelectionEvent(Event):
    """Result of selecting the query engine tools."""

    selected_query_engines: SelectorResult


class SynthesizeEvent(Event):
    """Event for synthesizing the response from different query engines."""

    result: List[RESPONSE_TYPE]
    selected_query_engines: SelectorResult

In [4]:
from llama_index.core.workflow import (
    Context,
    Workflow,
    StartEvent,
    StopEvent,
    step,
)

from llama_index.llms.openai import OpenAI
from llama_index.core.selectors.utils import get_selector_from_llm
from llama_index.core.base.response.schema import (
    PydanticResponse,
    Response,
    AsyncStreamingResponse,
)
from llama_index.core.bridge.pydantic import BaseModel
from llama_index.core.response_synthesizers import TreeSummarize
from llama_index.core.schema import QueryBundle
from llama_index.core import Settings

from IPython.display import Markdown, display
import asyncio


class RouterQueryEngineWorkflow(Workflow):
    @step
    async def selector(
        self, ctx: Context, ev: StartEvent
    ) -> QueryEngineSelectionEvent:
        """
        Selects a single/ multiple query engines based on the query.
        """

        await ctx.set("query", ev.get("query"))
        await ctx.set("llm", ev.get("llm"))
        await ctx.set("query_engine_tools", ev.get("query_engine_tools"))
        await ctx.set("summarizer", ev.get("summarizer"))

        llm = Settings.llm
        select_multiple_query_engines = ev.get("select_multi")
        query = ev.get("query")
        query_engine_tools = ev.get("query_engine_tools")

        selector = get_selector_from_llm(
            llm, is_multi=select_multiple_query_engines
        )

        query_engines_metadata = [
            query_engine.metadata for query_engine in query_engine_tools
        ]

        selected_query_engines = await selector.aselect(
            query_engines_metadata, query
        )

        return QueryEngineSelectionEvent(
            selected_query_engines=selected_query_engines
        )

    @step
    async def generate_responses(
        self, ctx: Context, ev: QueryEngineSelectionEvent
    ) -> SynthesizeEvent:
        """Generate the responses from the selected query engines."""

        query = await ctx.get("query", default=None)
        selected_query_engines = ev.selected_query_engines
        query_engine_tools = await ctx.get("query_engine_tools")

        query_engines = [engine.query_engine for engine in query_engine_tools]

        print(
            f"number of selected query engines: {len(selected_query_engines.selections)}"
        )

        if len(selected_query_engines.selections) > 1:
            tasks = []
            for selected_query_engine in selected_query_engines.selections:
                print(
                    f"Selected query engine: {selected_query_engine.index}: {selected_query_engine.reason}"
                )
                query_engine = query_engines[selected_query_engine.index]
                tasks.append(query_engine.aquery(query))

            response_generated = await asyncio.gather(*tasks)

        else:
            query_engine = query_engines[
                selected_query_engines.selections[0].index
            ]

            print(
                f"Selected query engine: {selected_query_engines.ind}: {selected_query_engines.reason}"
            )

            response_generated = [await query_engine.aquery(query)]

        return SynthesizeEvent(
            result=response_generated,
            selected_query_engines=selected_query_engines,
        )

    async def acombine_responses(
        self,
        summarizer: TreeSummarize,
        responses: List[RESPONSE_TYPE],
        query_bundle: QueryBundle,
    ) -> RESPONSE_TYPE:
        """Async combine multiple response from sub-engines."""

        print("Combining responses from multiple query engines.")

        response_strs = []
        source_nodes = []
        for response in responses:
            if isinstance(
                response, (AsyncStreamingResponse, PydanticResponse)
            ):
                response_obj = await response.aget_response()
            else:
                response_obj = response
            source_nodes.extend(response_obj.source_nodes)
            response_strs.append(str(response))

        summary = await summarizer.aget_response(
            query_bundle.query_str, response_strs
        )

        if isinstance(summary, str):
            return Response(response=summary, source_nodes=source_nodes)
        elif isinstance(summary, BaseModel):
            return PydanticResponse(
                response=summary, source_nodes=source_nodes
            )
        else:
            return AsyncStreamingResponse(
                response_gen=summary, source_nodes=source_nodes
            )

    @step
    async def synthesize_responses(
        self, ctx: Context, ev: SynthesizeEvent
    ) -> StopEvent:
        """Synthesizes the responses from the generated responses."""

        response_generated = ev.result
        query = await ctx.get("query", default=None)
        summarizer = await ctx.get("summarizer")
        selected_query_engines = ev.selected_query_engines

        if len(response_generated) > 1:
            response = await self.acombine_responses(
                summarizer, response_generated, QueryBundle(query_str=query)
            )
        else:
            response = response_generated[0]

        response.metadata = response.metadata or {}
        response.metadata["selector_result"] = selected_query_engines

        return StopEvent(result=response)

In [5]:
llm = OpenAI(model="gpt-4o-mini")
Settings.llm = llm

In [6]:
from llama_index.core.prompts.default_prompt_selectors import (
    DEFAULT_TREE_SUMMARIZE_PROMPT_SEL,
)

summarizer = TreeSummarize(
    llm=llm,
    summary_template=DEFAULT_TREE_SUMMARIZE_PROMPT_SEL,
)

In [7]:
import os
import urllib.request

directory = 'data/paul_graham/'
file_path = os.path.join(directory, 'paul_graham_essay.txt')
os.makedirs(directory, exist_ok=True)
url = 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/paul_graham/paul_graham_essay.txt'
urllib.request.urlretrieve(url, file_path)
print(f"File saved at {file_path}")


File saved at data/paul_graham/paul_graham_essay.txt


In [8]:
from llama_index.core import SimpleDirectoryReader

documents = SimpleDirectoryReader("./data/paul_graham").load_data()

In [9]:
nodes = Settings.node_parser.get_nodes_from_documents(documents)

In [10]:
from llama_index.core import (
    VectorStoreIndex,
    SummaryIndex,
    SimpleKeywordTableIndex,
)

summary_index = SummaryIndex(nodes)
vector_index = VectorStoreIndex(nodes)
keyword_index = SimpleKeywordTableIndex(nodes)

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


In [11]:
from llama_index.core.tools import QueryEngineTool

list_query_engine = summary_index.as_query_engine(
    response_mode="tree_summarize",
    use_async=True,
)
vector_query_engine = vector_index.as_query_engine()
keyword_query_engine = keyword_index.as_query_engine()

list_tool = QueryEngineTool.from_defaults(
    query_engine=list_query_engine,
    description=(
        "Useful for summarization questions related to Paul Graham eassy on"
        " What I Worked On."
    ),
)

vector_tool = QueryEngineTool.from_defaults(
    query_engine=vector_query_engine,
    description=(
        "Useful for retrieving specific context from Paul Graham essay on What"
        " I Worked On."
    ),
)

keyword_tool = QueryEngineTool.from_defaults(
    query_engine=keyword_query_engine,
    description=(
        "Useful for retrieving specific context using keywords from Paul"
        " Graham essay on What I Worked On."
    ),
)

query_engine_tools = [list_tool, vector_tool, keyword_tool]

In [12]:
import nest_asyncio

nest_asyncio.apply()

In [19]:
questions=[("Provide the summary of the document?",True),
           ("What did the author do growing up?",False),
           ("What were noteable events and people from the authors time at Interleaf and YC?,",True)]

### Run the workflow with RagaAI Catalyst

In [20]:
with tracer:
    w = RouterQueryEngineWorkflow(timeout=200)

    for query,select_multi in questions:
        result = await w.run(
            query=query,
            llm=llm,
            query_engine_tools=query_engine_tools,
            summarizer=summarizer,
            select_multi=select_multi, 
        )

        display(
            Markdown("> Question: {}".format(query)),
            Markdown("Answer: {}".format(result)),
        )

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


number of selected query engines: 1
Selected query engine: 0: This choice directly addresses the need for a summary of the document.


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Uploading traces...
**********
Trace: query
**********


> Question: Provide the summary of the document?

Answer: The document is a reflective essay detailing the author's journey through various phases of life, focusing on their experiences in writing, programming, art, and entrepreneurship. It begins with their early interests in writing short stories and programming on an IBM 1401, transitioning to microcomputers and eventually to a deeper engagement with artificial intelligence during college. The author expresses disillusionment with philosophy and a shift towards AI, leading to self-directed learning and the development of a thesis on SHRDLU.

After graduate school, the author explores art, attending classes at Harvard and later the Accademia di Belle Arti in Florence, where they realize the limitations of formal art education. They return to the U.S. to work at Interleaf, where they learn about the software industry and the dynamics of technology companies. This experience culminates in the founding of Viaweb, an early e-commerce platform, which is later acquired by Yahoo.

The essay also discusses the author's transition to angel investing and the founding of Y Combinator, a startup accelerator that revolutionizes startup funding through a batch model. The author reflects on the challenges and successes of Y Combinator, the importance of community among startups, and the evolution of their own interests over time.

In the latter part of the essay, the author shares their return to painting after stepping back from Y Combinator, the development of a new Lisp programming language called Bel, and the realization of the potential for online publishing. The narrative emphasizes the value of pursuing unprestigious work, the significance of personal experiences in shaping one's path, and the ongoing exploration of creativity and technology.

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


number of selected query engines: 1
Selected query engine: 1: The question asks for specific context about the author's experiences growing up, which aligns with retrieving specific context from the essay.


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Uploading traces...
**********
Trace: query
**********


> Question: What did the author do growing up?

Answer: Growing up, the author worked on writing and programming outside of school. Initially, he wrote short stories, which he later described as lacking plot and depth. He also began programming on an IBM 1401 in 9th grade, where he experimented with an early version of Fortran, although he found it puzzling and struggled to create meaningful programs. Eventually, he got a TRS-80 microcomputer, which allowed him to start programming more seriously, creating simple games and a word processor. Despite enjoying programming, he initially planned to study philosophy in college.

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:llama_index.core.indices.keyword_table.retrievers:> Starting query: What were noteable events and people from the authors time at Interleaf and YC?,
INFO:llama_index.core.indices.keyword_table.retrievers:query keywords: ['interleaf', 'people', 'yc', 'time', 'authors', 'noteable', 'events']
INFO:llama_index.core.indices.keyword_table.retrievers:> Extracted keywords: ['interleaf', 'people', 'yc', 'time']


number of selected query engines: 2
Selected query engine: 1: This choice is useful for retrieving specific context related to notable events and people from the author's time at Interleaf and YC.
Selected query engine: 2: This choice allows for retrieving specific context using keywords, which can help in identifying notable events and people.


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Uploading traces...
**********
Trace: query
**********


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


**********
Trace: query
**********
Combining responses from multiple query engines.


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


> Question: What were noteable events and people from the authors time at Interleaf and YC?,

Answer: During the author's time at Interleaf, notable events included the decision to incorporate a scripting language inspired by Emacs, which was a dialect of Lisp. The author gained insights into the dynamics of technology companies, emphasizing the importance of being run by product people rather than salespeople, and learned about the challenges posed by bureaucratic customers and the need for a flexible work environment.

At Y Combinator, significant events included the transition of leadership when the author handed over control to Sam Altman, following advice from Robert Morris. The growth of YC into a supportive community for startups was another key development, as alumni began to assist each other, fostering collaboration. Additionally, the author faced challenges related to managing disputes among cofounders and the stress associated with running Hacker News, which became a major focus during his tenure.

Traces uploaded
