# Experimentation using Router Query Engine
This notebook explores how to use llamaIndex's router query engine to allow the handling of both knowledge queries as well as summarization queries. Depending on the question, the engine will route the query to the right index.

In [2]:
%load_ext dotenv
%dotenv

Python-dotenv could not parse statement starting at line 5


In [3]:
from llama_index import SimpleDirectoryReader, StorageContext
from llama_index.indices.vector_store import VectorStoreIndex
from llama_index.vector_stores import PGVectorStore
from llama_index import (
    VectorStoreIndex,
    SummaryIndex,
    SimpleDirectoryReader,
    ServiceContext,
    StorageContext,
)

In [14]:
import nest_asyncio

nest_asyncio.apply()

In [4]:
from decouple import config
import logging
import sys

# logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
# logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

OPENAI_API_KEY = config('OPENAI_API_KEY')

In [5]:
import psycopg2

#initialize vectorstore config

PGVECTOR_CONNECTION_STRING = config('PGVECTOR_CONNECTION_STRING')
PGVECTOR_DATABASE = 'vector_db'

connection_string = PGVECTOR_CONNECTION_STRING
db_name = PGVECTOR_DATABASE
conn = psycopg2.connect(connection_string)
conn.autocommit = True


In [6]:
from sqlalchemy.engine import make_url

# Load documents
documents = SimpleDirectoryReader("./data").load_data()

# initialize service context (set chunk size)
service_context = ServiceContext.from_defaults(chunk_size=1024)
nodes = service_context.node_parser.get_nodes_from_documents(documents)

#initialize storage context
url = make_url(connection_string)
vector_store = PGVectorStore.from_params(
    database=db_name,
    host=url.host,
    password=url.password,
    port=url.port,
    user=url.username,
    table_name="DeloitteFutureOfAI",
    embed_dim=1536,  # openai embedding dimension
)

storage_context = StorageContext.from_defaults(vector_store=vector_store)
storage_context.docstore.add_documents(nodes)


In [7]:
#define summary index and vector index
summary_index = SummaryIndex(nodes, storage_context=storage_context)
vector_index = VectorStoreIndex(nodes, storage_context=storage_context)

In [9]:
#define query engines
list_query_engine = summary_index.as_query_engine(
    response_mode="tree_summarize",
    use_async=True,
)
vector_query_engine = vector_index.as_query_engine()

In [10]:
from llama_index.tools.query_engine import QueryEngineTool


list_tool = QueryEngineTool.from_defaults(
    query_engine=list_query_engine,
    description=(
        "Useful for summarization questions related to Deloitte reports"
    ),
)

vector_tool = QueryEngineTool.from_defaults(
    query_engine=vector_query_engine,
    description=(
        "Useful for summarization questions related to Deloitte reports"
    ),
)

In [12]:
from llama_index import SimpleKeywordTableIndex
from llama_index.query_engine.router_query_engine import RouterQueryEngine
from llama_index.selectors.pydantic_selectors import PydanticMultiSelector

keyword_index = SimpleKeywordTableIndex(nodes, storage_context=storage_context)

keyword_tool = QueryEngineTool.from_defaults(
    query_engine=vector_query_engine,
    description=(
        "Useful for retrieving specific context using keywords from Deloitte reports"
    ),
)

query_engine = RouterQueryEngine(
    selector=PydanticMultiSelector.from_defaults(),
    query_engine_tools=[
        list_tool,
        vector_tool,
        keyword_tool,
    ],
)

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/danielkwik/Library/Caches/llama_index...
[nltk_data]   Unzipping corpora/stopwords.zip.


In [19]:
# either way we can now query the index
response = query_engine.query("What can we liken to C-3PO and Chewbacca?")
print(response)

The qualification requirements of a gold producer are not provided in the given context information.
