# Agentic RAG: Financial Broker 

In [1]:
# initial setup
from dotenv import load_dotenv


load_dotenv()

True

## 1. Data Extraction

In [16]:
from doctr.models import ocr_predictor
from doctr.io import DocumentFile


class PDFExtractor:
    pass

BASE_FOLDER = "../data"
TEST_FILE = "el-consumo-privado-sostiene-la-economia-en-abril-pese-a-caidas-en-mineria-y-construccion.pdf"

# PDF
pdf_doc = DocumentFile.from_pdf(f"{BASE_FOLDER}/{TEST_FILE}")

model = ocr_predictor(det_arch='db_resnet50', reco_arch='crnn_vgg16_bn', pretrained=True)

result = model(pdf_doc)

In [5]:
string_result = result.render()
documents = []
documents.append(result.render())

In [3]:
from llama_index.core.node_parser import SentenceSplitter

splitter = SentenceSplitter(chunk_size=2048)
nodes = splitter.get_nodes_from_documents(documents)

In [4]:
from llama_index.core import Settings
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

Settings.llm = OpenAI(model="gpt-4o",)
Settings.embed_model = HuggingFaceEmbedding(
    model_name="BAAI/bge-small-en-v1.5"
)

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/94.8k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/743 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/133M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [5]:
from llama_index.core import SummaryIndex, VectorStoreIndex

summary_index = SummaryIndex(nodes)
vector_index = VectorStoreIndex(nodes)

In [14]:
vector_index

<llama_index.core.indices.vector_store.base.VectorStoreIndex at 0x1688d2670>

In [6]:
summary_query_engine = summary_index.as_query_engine(
    response_mode="tree_summarize",
    use_async=True,
)
vector_query_engine = vector_index.as_query_engine()

In [8]:
from llama_index.core.tools import QueryEngineTool


summary_tool = QueryEngineTool.from_defaults(
    query_engine=summary_query_engine,
    description=(
        "Useful for summarization questions related to Financial Reports"
    ),
)

vector_tool = QueryEngineTool.from_defaults(
    query_engine=vector_query_engine,
    description=(
        "Useful for retrieving specific context from the Financial Report."
    ),
)

In [9]:
from llama_index.core.query_engine.router_query_engine import RouterQueryEngine
from llama_index.core.selectors import LLMSingleSelector


query_engine = RouterQueryEngine(
    selector=LLMSingleSelector.from_defaults(),
    query_engine_tools=[
        summary_tool,
        vector_tool,
    ],
    verbose=True
)

In [24]:
response = query_engine.query("What is the summary of the document?")
#print(str(response))

[1;3;38;5;200mSelecting query engine 0: Choice 1 is relevant because it is useful for summarization questions, which aligns with the request for a summary of the document..
[0m

Retrying llama_index.llms.openai.base.OpenAI._achat in 1.0 seconds as it raised RateLimitError: Error code: 429 - {'error': {'message': 'Request too large for gpt-4o in organization org-ZrQ7KguAsnITRgl9fajFc0pZ on tokens per min (TPM): Limit 30000, Requested 59765. The input or output tokens must be reduced in order to run successfully. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}.
Retrying llama_index.llms.openai.base.OpenAI._achat in 1.0 seconds as it raised RateLimitError: Error code: 429 - {'error': {'message': 'Request too large for gpt-4o in organization org-ZrQ7KguAsnITRgl9fajFc0pZ on tokens per min (TPM): Limit 30000, Requested 51069. The input or output tokens must be reduced in order to run successfully. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}.
Retrying llama_index.llms.openai.base.OpenAI._ac

RateLimitError: Error code: 429 - {'error': {'message': 'Request too large for gpt-4o in organization org-ZrQ7KguAsnITRgl9fajFc0pZ on tokens per min (TPM): Limit 30000, Requested 55687. The input or output tokens must be reduced in order to run successfully. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}

## 2.  VectorDB indexing

In [12]:
from qdrant_client import models, QdrantClient
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.core import VectorStoreIndex
from fastembed import TextEmbedding



client = QdrantClient(
    "http://localhost:6333",
)
COLLECTION_NAME = "financial-documents-v0"
EMBEDDING_DIMENSIONALITY=768

client.create_collection(
    collection_name=COLLECTION_NAME,
    vectors_config=models.VectorParams(
        size=EMBEDDING_DIMENSIONALITY,
        distance=models.Distance.COSINE
    )
)
#vector_store = QdrantVectorStore(client=client, collection_name=COLLECTION_NAME)
#index = VectorStoreIndex.from_vector_store(vector_store=vector_store)

True

In [11]:
# client.delete_collection(collection_name=COLLECTION_NAME)

True

In [15]:
# TextEmbedding.list_supported_models()

In [13]:
model_handle = "jinaai/jina-embeddings-v2-base-es"

points = []
id = 0

for doc in documents:

    point = models.PointStruct(
        id=id,
        vector=models.Document(text=doc, model=model_handle), 
        payload={
            "text": doc,
        } #save all needed metadata fields
    )
    points.append(point)

    id += 1

In [14]:
client.upsert(
    collection_name=COLLECTION_NAME,
    points=points
)

UpdateResult(operation_id=0, status=<UpdateStatus.COMPLETED: 'completed'>)

## 3. Agents Architecture

## 4. Orchestration