# Router Query Engine

In [None]:
!pip install llama-index

Collecting llama-index
  Downloading llama_index-0.8.9-py3-none-any.whl (702 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m702.4/702.4 kB[0m [31m6.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting tiktoken (from llama-index)
  Downloading tiktoken-0.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m14.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting dataclasses-json (from llama-index)
  Downloading dataclasses_json-0.5.14-py3-none-any.whl (26 kB)
Collecting langchain>=0.0.262 (from llama-index)
  Downloading langchain-0.0.272-py3-none-any.whl (1.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m17.5 MB/s[0m eta [36m0:00:00[0m
Collecting openai>=0.26.4 (from llama-index)
  Downloading openai-0.27.9-py3-none-any.whl (75 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.5/75.5 kB[0m [31m6.5 MB/s[0

In [None]:
# NOTE: This is ONLY necessary in jupyter notebook.
# Details: Jupyter runs an event-loop behind the scenes.
#          This results in nested event-loops when we start an event-loop to make async queries.
#          This is normally not allowed, we use nest_asyncio to allow it for convenience.
import nest_asyncio

nest_asyncio.apply()

In [None]:
import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().handlers = []
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

from llama_index import (
    VectorStoreIndex,
    ListIndex,
    SimpleDirectoryReader,
    ServiceContext,
    StorageContext,
)

import openai
openai.api_key = 'YOUR_OPENAI_API_KEY'

In [None]:
# load documents
documents = SimpleDirectoryReader("data").load_data()

# initialize service context (set chunk size)
service_context = ServiceContext.from_defaults(chunk_size=1024)
nodes = service_context.node_parser.get_nodes_from_documents(documents)

# initialize storage context (by default it's in-memory)
storage_context = StorageContext.from_defaults()
storage_context.docstore.add_documents(nodes)

[nltk_data] Downloading package punkt to /tmp/llama_index...
[nltk_data]   Unzipping tokenizers/punkt.zip.


# Define List Index and Vector Index over Same Data

In [None]:
list_index = ListIndex(nodes, storage_context=storage_context)
vector_index = VectorStoreIndex(nodes, storage_context=storage_context)

# Define Query Engines and Set Metadata

In [None]:
list_query_engine = list_index.as_query_engine(
    response_mode="tree_summarize",
    use_async=True,
)
vector_query_engine = vector_index.as_query_engine()

In [None]:
from llama_index.tools.query_engine import QueryEngineTool


list_tool = QueryEngineTool.from_defaults(
    query_engine=list_query_engine,
    description="Useful for summarization questions related to Paul Graham eassy on What I Worked On.",
)

vector_tool = QueryEngineTool.from_defaults(
    query_engine=vector_query_engine,
    description="Useful for retrieving specific context from Paul Graham essay on What I Worked On.",
)

# Define Router Query Engine

## PydanticSingleSelector

In [None]:
from llama_index.query_engine.router_query_engine import RouterQueryEngine
from llama_index.selectors.llm_selectors import LLMSingleSelector, LLMMultiSelector
from llama_index.selectors.pydantic_selectors import (
    PydanticMultiSelector,
    PydanticSingleSelector,
)


query_engine = RouterQueryEngine(
    selector=PydanticSingleSelector.from_defaults(),
    query_engine_tools=[
        list_tool,
        vector_tool,
    ],
)

In [None]:
response = query_engine.query("What is the summary of the document?")

In [None]:
response.response

"The document provides a personal account of the author's experiences and interests in writing, programming, art, and entrepreneurship. It discusses their early experiences in writing and programming, their interest in artificial intelligence, and their transition to art. The document also explores their time working at Interleaf, their decision to start a company called Viaweb, and the challenges and successes they faced in building the software and navigating the startup landscape. Additionally, it touches on the author's involvement with Y Combinator and their decision to retire and pursue other interests. The document provides insights into the author's journey and decision-making process throughout their career."

## LLMSingleSelector

In [None]:
query_engine = RouterQueryEngine(
    selector=LLMSingleSelector.from_defaults(),
    query_engine_tools=[
        list_tool,
        vector_tool,
    ],
)

In [None]:
response = query_engine.query("What is the summary of the document?")
response.response

"The document provides a personal account of the author's experiences and interests in writing, programming, art, and entrepreneurship. It starts with their early experiences in writing and programming, their fascination with artificial intelligence, and their decision to focus on Lisp programming. The author also discusses their interest in art, their experiences at art school, and their exploration of still life painting. The document then transitions to the author's experiences working at Interleaf, attending art school, and starting a company called Viaweb. It describes the challenges and successes of building the software and launching the company. The author later reflects on their decision to leave Viaweb, pursue other interests, and start a new company focused on web applications. The document also touches on the author's involvement in various projects, such as spam filters and creating Hacker News. It concludes with the author's experiences leading up to the creation of Y Com

In [None]:
response = query_engine.query("What did Paul Graham do after RICS?")
response.response

'After leaving RISD, Paul Graham moved to New York and resumed his old life. He continued to paint and experimented with a new kind of still life. He also looked for an apartment to buy and eventually got the idea to build a web app for making web apps. He decided to move to Cambridge and start this new venture, recruiting a team to work on it.'

## PydanticMultiSelector

In [None]:
from llama_index import SimpleKeywordTableIndex

keyword_index = SimpleKeywordTableIndex(nodes, storage_context=storage_context)

keyword_tool = QueryEngineTool.from_defaults(
    query_engine=vector_query_engine,
    description="Useful for retrieving specific context using keywords from Paul Graham essay on What I Worked On.",
)

In [None]:
query_engine = RouterQueryEngine(
    selector=PydanticMultiSelector.from_defaults(),
    query_engine_tools=[
        list_tool,
        vector_tool,
        keyword_tool,
    ],
)

In [None]:
# This query could use either a keyword or vector query engine, so it will combine responses from both
response = query_engine.query(
    "What were noteable events and people from the authors time at Interleaf and YC?"
)

In [None]:
response.response

"The notable events and people from the author's time at Interleaf are not mentioned in the provided context information. However, during the author's time at Y Combinator (YC), they co-founded a company called Viaweb with Robert Morris and received seed funding from Julian Weber. The author's deal with Julian became the model for Y Combinator's future deals with founders. The author also recruited Sam Altman to become the president of YC, marking a changing of the guard and a reorganization of the company. They also worked closely with Jessica Livingston, who played a significant role in YC. The context mentions the growth and success of YC, as well as the decision to transition from being a fund back to being self-funded."