## Preparation

In [1]:
import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().handlers = []
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

In [2]:
from llama_index.core import Settings
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding

llm = OpenAI(model="gpt-4o-mini")
embed_model = OpenAIEmbedding(model="text-embedding-3-small")

Settings.llm = llm
Settings.embed_model = embed_model

In [3]:
from llama_index.core import SimpleDirectoryReader
from llama_index.core.node_parser import SentenceSplitter

reader = SimpleDirectoryReader(input_files=["data/saudi2030.pdf"])
documents = reader.load_data()
splitter = SentenceSplitter(chunk_size=1024)
nodes = splitter.get_nodes_from_documents(documents)

Ignoring wrong pointing object 261 0 (offset 0)
Ignoring wrong pointing object 296 0 (offset 0)
Ignoring wrong pointing object 343 0 (offset 0)


In [4]:
from llama_index.core import VectorStoreIndex
from llama_index.core import SummaryIndex

vector_index = VectorStoreIndex(nodes)
summary_index = SummaryIndex(nodes)

HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


## RetrieverTool

In [5]:
from typing import List, Optional

from llama_index.core.bridge.pydantic import Field
from llama_index.core.postprocessor.types import BaseNodePostprocessor
from llama_index.core.schema import MetadataMode, NodeWithScore, QueryBundle


class SkipEvenPages(BaseNodePostprocessor):

    @classmethod
    def class_name(cls) -> str:
        return "SkipEvenPages"

    def _postprocess_nodes(
        self,
        nodes: List[NodeWithScore],
        query_bundle: Optional[QueryBundle] = None,
    ) -> List[NodeWithScore]:
        _nodes = []

        for n in nodes:
            if int(n.metadata['page_label']) % 2 == 0:
                continue
            _nodes.append(n)

        return _nodes

In [None]:
from llama_index.core.tools import RetrieverTool

vector_tool = RetrieverTool.from_defaults(
    name='Saudi 2030 Vision',
    description='Useful for retrieving specific context of Saudi 2030 Vision',
    retriever=vector_index.as_retriever(similarity_top_k=5),
    node_postprocessors=[SkipEvenPages()]
)

summary_tool = RetrieverTool.from_defaults(
    retriever=summary_index.as_retriever(similarity_top_k=5),
    name='Saudi 2030 Vision',
    description='Will retrieve all context of Saudi 2030 Vision',
)

In [30]:
from llama_index.core.selectors import LLMSingleSelector, LLMMultiSelector
from llama_index.core.selectors import (
    PydanticMultiSelector,
    PydanticSingleSelector,
)
from llama_index.core.retrievers import RouterRetriever

retriever = RouterRetriever(
    selector=PydanticSingleSelector.from_defaults(),
    retriever_tools=[
        summary_tool,
        vector_tool,
    ],
)

In [28]:
for node in retriever.retrieve("equality of opportunities"):
    print(node.metadata['page_label'], end=' ')

HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
Selecting retriever 1: The question 'equality of opportunities' requires specific context related to the Saudi 2030 Vision, making option 2 the most relevant choice..
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
36 35 39 40 72 

In [45]:
from llama_index.core.query_engine import RetrieverQueryEngine
saudi_vision = RetrieverQueryEngine.from_args(
    retriever=retriever,
    response_synthesizer=None,
    llm=None,
    node_postprocessors=[SkipEvenPages()]
)

In [46]:
_ = saudi_vision.query('equality of opportunities')

HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
Selecting retriever 1: The question 'equality of opportunities' requires specific context related to the Saudi 2030 Vision, making option 2 the most relevant choice..
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


In [47]:
_.source_nodes

[NodeWithScore(node=TextNode(id_='b93ea0fa-437b-4fd8-892f-1b9cb81a20dd', embedding=None, metadata={'page_label': '35', 'file_name': 'saudi2030.pdf', 'file_path': 'data/saudi2030.pdf', 'file_type': 'application/pdf', 'file_size': 3823127, 'creation_date': '2024-12-03', 'last_modified_date': '2024-12-03'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='6adf54ae-0719-4e36-b24c-a08722764427', node_type='4', metadata={'page_label': '35', 'file_name': 'saudi2030.pdf', 'file_path': 'data/saudi2030.pdf', 'file_type': 'application/pdf', 'file_size': 3823127, 'creation_date': '2024-12-03', 'last_modified_date': '2024-12-03'}, hash='90e064aaecbe8370d64f3367e36c57f635d887918f13dba437d3f2012c66167f')}, metadata_templat