# Naive RAG example

In [None]:
# install required dependencies
! pip install --upgrade pip
#! pip install langchain_community tiktoken langchain-openai chromadb langchain unstructured "unstructured[pdf]" langchain_ollama langchain_postgres "psycopg2[binary]"
! pip install langchain
! pip install langchain_community 
! pip install langchain-openai 
#! pip install chromadb
! pip install unstructured "unstructured[pdf]"
#! pip install langchain_ollama 
! pip install "psycopg[binary]"
! pip install langchain_postgres 
! pip install pydantic



In [2]:
from dotenv import load_dotenv
import os

load_dotenv()


False

## Getting source documents

In [3]:

!mkdir -p 'documents'
!curl -L 'https://itau-fn8-fundosdocumentos.cloud.itau.com.br/52678_COMPE.pdf' -o 'documents/52678_COMPE.pdf'
!curl -L 'https://itau-fn8-fundosdocumentos.cloud.itau.com.br/55765_COMAG.pdf' -o 'documents/55765_COMAG.pdf'
 

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  195k  100  195k    0     0   285k      0 --:--:-- --:--:-- --:--:--  285k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  122k  100  122k    0     0   627k      0 --:--:-- --:--:-- --:--:--  628k


## Chunking

In [4]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import DirectoryLoader

loader = DirectoryLoader("./documents", glob="**/*.pdf")
docs = loader.load()
len(docs)

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
splits

  from .autonotebook import tqdm as notebook_tqdm


[Document(metadata={'source': 'documents/55765_COMAG.pdf'}, page_content='PORTFÓLIO ITAÚ FIC FIM CNPJ 41.884.595/0001-72\n\nPRINCIPAIS INFORMAÇÕES\n\nO QUE É\n\nAplicação Inicial: R$ 1,00 Aplicação Adicional: R$ 1,00\n\nValor mínimo de permanência\n\nÉ um produto que reflete o equilíbrio entre as classes de ativos, definido mensalmente pela equipe responsável pela recomendação de investimentos do Itaú Unibanco, somado à curadoria do Itaú Fund of Funds na seleção de fundos das estratégias de multimercado e renda variável.\n\nR$ 1,00\n\nTaxa de administração\n\n0,75% a.a.\n\nTaxa de administração máxima\n\n1,5% a.a.\n\nTaxa de performance 10% do que exceder 100% de CDI\n\nTaxa de saída\n\nNão Há\n\nPARA QUEM É INDICADO\n\nTributação perseguida (I.R.) Longo prazo sem compromisso\n\nClientes que busquem em um único produto a possibilidade de acesso de seus investidores à recomendação completa de investimentos do Itaú Unibanco com diversificação no mercado local no mercado local e internaci

In [6]:
## adding metadata
for doc in splits:
    if doc.metadata['source'] == 'documents/55765_COMAG.pdf':
        doc.metadata['fundo'] = "ITAU_FIC_FIM"
    if doc.metadata['source'] == "documents/52678_COMPE.pdf":
        doc.metadata['fundo'] = "DIFERENCIADO_CREDITO_PRIVADO_LONGO_PRAZO_RENDA_FIXA"

splits

[Document(metadata={'source': 'documents/55765_COMAG.pdf', 'fundo': 'ITAU_FIC_FIM'}, page_content='PORTFÓLIO ITAÚ FIC FIM CNPJ 41.884.595/0001-72\n\nPRINCIPAIS INFORMAÇÕES\n\nO QUE É\n\nAplicação Inicial: R$ 1,00 Aplicação Adicional: R$ 1,00\n\nValor mínimo de permanência\n\nÉ um produto que reflete o equilíbrio entre as classes de ativos, definido mensalmente pela equipe responsável pela recomendação de investimentos do Itaú Unibanco, somado à curadoria do Itaú Fund of Funds na seleção de fundos das estratégias de multimercado e renda variável.\n\nR$ 1,00\n\nTaxa de administração\n\n0,75% a.a.\n\nTaxa de administração máxima\n\n1,5% a.a.\n\nTaxa de performance 10% do que exceder 100% de CDI\n\nTaxa de saída\n\nNão Há\n\nPARA QUEM É INDICADO\n\nTributação perseguida (I.R.) Longo prazo sem compromisso\n\nClientes que busquem em um único produto a possibilidade de acesso de seus investidores à recomendação completa de investimentos do Itaú Unibanco com diversificação no mercado local no 

In [10]:
# clean database

import psycopg

vector_db_name = "vector_db_rag"
admin_db_name = "postgres"
db_host = "localhost"
db_user = "postgres"
db_password = "postgres"
db_port = "5432"

#connection = "postgresql+psycopg://langchain:langchain@localhost:6024/langchain

connection_string = f"postgresql+psycopg://{db_user}:{db_password}@{db_host}:{db_port}/{vector_db_name}"

conn = psycopg.connect(dbname=admin_db_name, host=db_host, 
                       port=db_port, user=db_user, password=db_password)
conn.autocommit = True

with conn.cursor() as c:
    kill_connection_query  = f"""
                                SELECT 
                                    pg_terminate_backend(pid) 
                                FROM 
                                    pg_stat_activity 
                                WHERE 
                                    -- don't kill my own connection!
                                    pid <> pg_backend_pid()
                                    -- don't kill the connections to other databases
                                    AND datname = '{vector_db_name}'
                                    ;
                            """
    
    c.execute(kill_connection_query)

with conn.cursor() as c:
    c.execute(f"DROP DATABASE IF EXISTS {vector_db_name}")
    c.execute(f"CREATE DATABASE {vector_db_name}")

In [None]:
# vector store
from langchain_community.vectorstores import Chroma
from langchain_postgres import PGVector
#from langchain_ollama import OllamaEmbeddings
from langchain_openai import OpenAIEmbeddings
#from sqlalchemy import make_url    

#embedding_model = OllamaEmbeddings(model="all-minilm")
embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")

# Embed
# vectorstore = Chroma.from_documents(documents=splits, 
#                                     embedding=embedding_model)

vector_store = PGVector(
    embeddings=embedding_model,
    collection_name="fundos_investimento",
    connection=connection_string,
    use_jsonb=True,
)


vector_store.add_documents(documents=splits)

#retriever = vector_store.as_retriever(search_kwargs={'filter': {'fundo':'ITAU_FIC_FIM'}})
retriever = vector_store.as_retriever()

In [12]:
vector_store

<langchain_postgres.vectorstores.PGVector at 0x3225b5e20>

In [13]:
# activate debug logging
from langchain_core.globals import set_debug, set_verbose
import logging

logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
logging.debug("test")

set_debug(True)
#set_verbose(False)

DEBUG:root:test


In [14]:
# query the vector store
#from langchain_ollama import ChatOllama
from langchain_openai import ChatOpenAI
from langchain_core.runnables import RunnablePassthrough
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
# Prompt



# LLM
#llm = ChatOllama(model="llama3.1")
llm = ChatOpenAI(model="gpt-4.1")

system_prompt = """
     You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. 
     If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
     Question: {question} 
     Context: {context} 
     Answer:
     """

prompt_template = ChatPromptTemplate([("system", system_prompt)])

# Chain
rag_chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt_template
    | llm
    | StrOutputParser()
)

# Question
print(rag_chain.invoke("Qual o horário limite investir no fundo ?"))

DEBUG:openai._base_client:Request options: {'method': 'post', 'url': '/embeddings', 'files': None, 'idempotency_key': 'stainless-python-retry-dc48fdae-fbaa-4636-9269-de182faf38a6', 'post_parser': <function Embeddings.create.<locals>.parser at 0x323644d60>, 'json_data': {'input': [[32129, 297, 4917, 20358, 71080, 2793, 404, 912, 3887, 78, 949]], 'model': 'text-embedding-3-small', 'encoding_format': 'base64'}}
DEBUG:openai._base_client:Sending HTTP Request: POST https://api.openai.com/v1/embeddings
DEBUG:httpcore.connection:close.started
DEBUG:httpcore.connection:close.complete
DEBUG:httpcore.connection:connect_tcp.started host='api.openai.com' port=443 local_address=None timeout=None socket_options=None
DEBUG:httpcore.connection:connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x3231731d0>
DEBUG:httpcore.connection:start_tls.started ssl_context=<ssl.SSLContext object at 0x32222c9d0> server_hostname='api.openai.com' timeout=None
DEBUG:httpcore.connection:st

[32;1m[1;3m[chain/start][0m [1m[chain:RunnableSequence] Entering Chain run with input:
[0m{
  "input": "Qual o horário limite investir no fundo ?"
}
[32;1m[1;3m[chain/start][0m [1m[chain:RunnableSequence > chain:RunnableParallel<context,question>] Entering Chain run with input:
[0m{
  "input": "Qual o horário limite investir no fundo ?"
}
[32;1m[1;3m[chain/start][0m [1m[chain:RunnableSequence > chain:RunnableParallel<context,question> > chain:RunnablePassthrough] Entering Chain run with input:
[0m{
  "input": "Qual o horário limite investir no fundo ?"
}
[36;1m[1;3m[chain/end][0m [1m[chain:RunnableSequence > chain:RunnableParallel<context,question> > chain:RunnablePassthrough] [0ms] Exiting Chain run with output:
[0m{
  "output": "Qual o horário limite investir no fundo ?"
}


DEBUG:httpcore.http11:receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Wed, 21 May 2025 03:08:46 GMT'), (b'Content-Type', b'application/json'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'access-control-allow-origin', b'*'), (b'access-control-expose-headers', b'X-Request-ID'), (b'openai-model', b'text-embedding-3-small'), (b'openai-organization', b'hebert-organization'), (b'openai-processing-ms', b'70'), (b'openai-version', b'2020-10-01'), (b'strict-transport-security', b'max-age=31536000; includeSubDomains; preload'), (b'via', b'envoy-router-868484f7b6-d6bpt'), (b'x-envoy-upstream-service-time', b'72'), (b'x-ratelimit-limit-requests', b'3000'), (b'x-ratelimit-limit-tokens', b'1000000'), (b'x-ratelimit-remaining-requests', b'2999'), (b'x-ratelimit-remaining-tokens', b'999988'), (b'x-ratelimit-reset-requests', b'20ms'), (b'x-ratelimit-reset-tokens', b'0s'), (b'x-request-id', b'req_65e665aea0295a5c01e0f2559cecb236'), (b'cf-c

[36;1m[1;3m[chain/end][0m [1m[chain:RunnableSequence > chain:RunnableParallel<context,question>] [980ms] Exiting Chain run with output:
[0m[outputs]
[32;1m[1;3m[chain/start][0m [1m[chain:RunnableSequence > prompt:ChatPromptTemplate] Entering Prompt run with input:
[0m[inputs]
[36;1m[1;3m[chain/end][0m [1m[chain:RunnableSequence > prompt:ChatPromptTemplate] [1ms] Exiting Prompt run with output:
[0m[outputs]
[32;1m[1;3m[llm/start][0m [1m[chain:RunnableSequence > llm:ChatOpenAI] Entering LLM run with input:
[0m{
  "prompts": [
    "System: \n     You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. \n     If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\n     Question: Qual o horário limite investir no fundo ? \n     Context: [Document(id='753a7838-434a-421e-a45e-bd7bf88f3e7b', metadata={'fundo': 'ITAU_FIC_FIM', 'source': 'documents/55

DEBUG:httpcore.http11:receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Wed, 21 May 2025 03:08:49 GMT'), (b'Content-Type', b'application/json'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'access-control-expose-headers', b'X-Request-ID'), (b'openai-organization', b'hebert-organization'), (b'openai-processing-ms', b'1183'), (b'openai-version', b'2020-10-01'), (b'x-envoy-upstream-service-time', b'1199'), (b'x-ratelimit-limit-requests', b'500'), (b'x-ratelimit-limit-tokens', b'30000'), (b'x-ratelimit-remaining-requests', b'499'), (b'x-ratelimit-remaining-tokens', b'28711'), (b'x-ratelimit-reset-requests', b'120ms'), (b'x-ratelimit-reset-tokens', b'2.578s'), (b'x-request-id', b'req_906a3672c96d2efc3a7aa3dd7ab3186a'), (b'strict-transport-security', b'max-age=31536000; includeSubDomains; preload'), (b'cf-cache-status', b'DYNAMIC'), (b'Set-Cookie', b'__cf_bm=OTUbnaQ.qrWAiwzKP6PhWEw9KBPkf1q.gr2cSWmalr4-1747796929-1.0.1.1-fulGywzOmK

[36;1m[1;3m[llm/end][0m [1m[chain:RunnableSequence > llm:ChatOpenAI] [3.11s] Exiting LLM run with output:
[0m{
  "generations": [
    [
      {
        "generation_info": {
          "finish_reason": "stop",
          "logprobs": null
        },
        "type": "ChatGeneration",
        "message": {
          "lc": 1,
          "type": "constructor",
          "id": [
            "langchain",
            "schema",
            "messages",
            "AIMessage"
          ],
          "kwargs": {
            "content": "O horário limite para investir no fundo ITAU FIC FIM é até as 14h00. Aplicações feitas até esse horário terão cotização no mesmo dia da solicitação. Após esse horário, a solicitação será considerada para o próximo dia útil.",
            "additional_kwargs": {
              "refusal": null
            },
            "response_metadata": {
              "token_usage": {
                "completion_tokens": 52,
                "prompt_tokens": 1335,
                "t

In [None]:
## bonus - enrich query with a structured output
from pydantic import BaseModel, Field
from langchain_openai import ChatOpenAI

class SearchQuery(BaseModel):
    """Search to execute againts a vector database."""

    search: str = Field(description="The user search")
    fundo_investimento: str = Field(description="The investment fund name, " \
    "there are only two ITAU_FIC_FIM or DIFERENCIADO_CREDITO_PRIVADO_LONGO_PRAZO_RENDA_FIXA ")


llm_structured_output = ChatOpenAI(model="gpt-4o-mini")

structured_llm = llm_structured_output.with_structured_output(SearchQuery)

response = structured_llm.invoke("Please, convert the user query: Qual é o hórário limite do fundo fic fim ? ")
response