In [1]:
from llama_index.readers.file import PyMuPDFReader
from llama_index.core import VectorStoreIndex
from llama_index.core.node_parser import SentenceSplitter
from llama_index.embeddings.ollama import OllamaEmbedding

In [2]:
from langchain_ollama import OllamaLLM
from langchain_core.prompts import PromptTemplate

In [None]:
# LlamaIndex: leitura, chunking, índice (dados)
docs  = PyMuPDFReader().load("data/lei-8080.pdf")
nodes = SentenceSplitter(chunk_size=800, chunk_overlap=120).get_nodes_from_documents(docs)
index = VectorStoreIndex(nodes, embed_model=OllamaEmbedding("nomic-embed-text"))
retr  = index.as_retriever(similarity_top_k=3,)

2025-09-02 18:45:10,404 - INFO - HTTP Request: POST http://localhost:11434/api/embed "HTTP/1.1 200 OK"
2025-09-02 18:45:10,435 - INFO - HTTP Request: POST http://localhost:11434/api/embed "HTTP/1.1 200 OK"
2025-09-02 18:45:10,482 - INFO - HTTP Request: POST http://localhost:11434/api/embed "HTTP/1.1 200 OK"
2025-09-02 18:45:10,504 - INFO - HTTP Request: POST http://localhost:11434/api/embed "HTTP/1.1 200 OK"
2025-09-02 18:45:10,535 - INFO - HTTP Request: POST http://localhost:11434/api/embed "HTTP/1.1 200 OK"
2025-09-02 18:45:10,562 - INFO - HTTP Request: POST http://localhost:11434/api/embed "HTTP/1.1 200 OK"
2025-09-02 18:45:10,585 - INFO - HTTP Request: POST http://localhost:11434/api/embed "HTTP/1.1 200 OK"
2025-09-02 18:45:10,606 - INFO - HTTP Request: POST http://localhost:11434/api/embed "HTTP/1.1 200 OK"
2025-09-02 18:45:10,634 - INFO - HTTP Request: POST http://localhost:11434/api/embed "HTTP/1.1 200 OK"
2025-09-02 18:45:10,663 - INFO - HTTP Request: POST http://localhost:1143

In [None]:
# LangChain: prompt + LLM (orquestração)
llm = OllamaLLM(model="llama3:8b", 
                temperature=0)
T   = PromptTemplate.from_template(
    "Responda APENAS com base nos trechos; se faltar evidência, diga 'Não sei'.\n"
    "Pergunta: {q}\n\nTrechos:\n{ctx}"
)

In [11]:
def ask(q: str):
    hits = retr.retrieve(q)
    if not hits:
        return "Não sei.", ""
    ctx = "\n\n".join(h.node.get_content()[:900] for h in hits)
    ans = llm.invoke(T.format(q=q, ctx=ctx))
    fontes = "\n".join(sorted({h.node.metadata.get("file_path") or h.node.metadata.get("filename","") for h in hits}))
    return ans, fontes

In [12]:
# Exemplo
resp, src = ask("Quais são os princípios do SUS segundo a Lei 8.080?")
print(resp)
if src: print("\nFontes:\n" + src)

2025-09-02 18:47:12,355 - INFO - HTTP Request: POST http://localhost:11434/api/embed "HTTP/1.1 200 OK"
2025-09-02 18:47:16,179 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"


Os princípios do SUS segundo a Lei 8.080 são:

I - universalidade de acesso aos serviços de saúde em todos os níveis de assistência.

Não há mais trechos mencionando outros princípios, portanto não sei se existem outros.

Fontes:
data/lei-8080.pdf


# Melhorar as respostas

In [13]:
from llama_index.core import VectorStoreIndex, Document
from llama_index.core.node_parser import SentenceSplitter
from llama_index.llms.ollama import Ollama
from llama_index.embeddings.ollama import OllamaEmbedding

In [None]:
docs = PyMuPDFReader().load("data/lei_8080.pdf")

In [34]:
# Chunking MAIOR + overlap (preserva artigos/incisos)
nodes = SentenceSplitter(
    chunk_size=1400,     # maior = mais contexto por trecho
    chunk_overlap=200,   # emenda segura entre chunks
    paragraph_separator="\n\n",
).get_nodes_from_documents(docs)

In [36]:
# Indexar e configurar consulta
index = VectorStoreIndex(nodes, embed_model=OllamaEmbedding("nomic-embed-text"))
qe = index.as_query_engine(
    llm=Ollama("llama3:8b", temperature=0),
    similarity_top_k=8,        # traz mais trechos para evitar "ficar de fora"
    response_mode="refine",    # junta/rafina a resposta a partir de vários trechos
)

2025-09-02 18:59:42,754 - INFO - HTTP Request: POST http://localhost:11434/api/embed "HTTP/1.1 200 OK"
2025-09-02 18:59:42,831 - INFO - HTTP Request: POST http://localhost:11434/api/embed "HTTP/1.1 200 OK"
2025-09-02 18:59:42,869 - INFO - HTTP Request: POST http://localhost:11434/api/embed "HTTP/1.1 200 OK"
2025-09-02 18:59:42,899 - INFO - HTTP Request: POST http://localhost:11434/api/embed "HTTP/1.1 200 OK"
2025-09-02 18:59:42,929 - INFO - HTTP Request: POST http://localhost:11434/api/embed "HTTP/1.1 200 OK"
2025-09-02 18:59:42,967 - INFO - HTTP Request: POST http://localhost:11434/api/embed "HTTP/1.1 200 OK"
2025-09-02 18:59:42,996 - INFO - HTTP Request: POST http://localhost:11434/api/embed "HTTP/1.1 200 OK"
2025-09-02 18:59:43,035 - INFO - HTTP Request: POST http://localhost:11434/api/embed "HTTP/1.1 200 OK"
2025-09-02 18:59:43,062 - INFO - HTTP Request: POST http://localhost:11434/api/embed "HTTP/1.1 200 OK"
2025-09-02 18:59:43,094 - INFO - HTTP Request: POST http://localhost:1143

In [37]:
# Perguntar
q = "Quais são os princípios do SUS segundo a Lei 8.080?"
resp = qe.query(q)
print(resp)  # resposta já completa

2025-09-02 19:00:01,355 - INFO - HTTP Request: POST http://localhost:11434/api/embed "HTTP/1.1 200 OK"
2025-09-02 19:00:05,637 - INFO - HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"
2025-09-02 19:00:06,380 - INFO - HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"
2025-09-02 19:00:06,952 - INFO - HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"
2025-09-02 19:00:07,423 - INFO - HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"
2025-09-02 19:00:07,886 - INFO - HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"
2025-09-02 19:00:10,792 - INFO - HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"
2025-09-02 19:00:12,178 - INFO - HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"
2025-09-02 19:00:13,539 - INFO - HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"


Os princípios do Sistema Único de Saúde (SUS) são fundamentais para garantir a qualidade e equidade da assistência à saúde. Entre esses princípios, destacam-se a articulação contínua das ações preventivas e curativas, a preservação da autonomia individual, a igualdade na assistência à saúde e a participação da comunidade. Além disso, o SUS também prioriza a descentralização política-administrativa, a integração de ações em diferentes áreas e a conjugação dos recursos para garantir a resolução eficaz dos serviços de saúde.


In [None]:
print("\nFontes:")
for s in getattr(resp, "source_nodes", []):
    print("-", s.node.metadata.get("filename", ""))