In [1]:
import os
import sys
sys.path.append(os.path.abspath(os.path.join('..')))

In [2]:
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_ollama import OllamaLLM
from langchain.retrievers.document_compressors import CrossEncoderReranker
from langchain.retrievers import ContextualCompressionRetriever
from src.rag_components import create_rag_chain 
from src.vector_store import build_vector_store 
from src.data_processing import process_pdf_to_chunks
from langchain_community.cross_encoders import HuggingFaceCrossEncoder
from pprint import pprint

In [3]:
DATA_PATH = "/home/enzo/Desktop/riza/data/raw"
MANUAL_COMPLETO = os.path.join(DATA_PATH, "galaxy_z_flip_7.pdf")
MANUAL_TESTE = os.path.join(DATA_PATH, "galaxy_z_flip_7_teste.pdf")

In [18]:
persist_directory = "./chroma_db_test1"

In [5]:
final_chunks = process_pdf_to_chunks(MANUAL_TESTE)

2025-09-30 08:33:33,489 - INFO - detected formats: [<InputFormat.PDF: 'pdf'>]
2025-09-30 08:33:33,523 - INFO - Going to convert document batch...
2025-09-30 08:33:33,524 - INFO - Initializing pipeline for StandardPdfPipeline with options hash e647edf348883bed75367b22fbe60347
2025-09-30 08:33:33,538 - INFO - Loading plugin 'docling_defaults'
2025-09-30 08:33:33,541 - INFO - Registered picture descriptions: ['vlm', 'api']
2025-09-30 08:33:33,552 - INFO - Loading plugin 'docling_defaults'
2025-09-30 08:33:33,556 - INFO - Registered ocr engines: ['easyocr', 'ocrmac', 'rapidocr', 'tesserocr', 'tesseract']
2025-09-30 08:33:33,677 - INFO - Accelerator device: 'cpu'


Iniciando conversão do arquivo: /home/enzo/Desktop/riza/data/raw/galaxy_z_flip_7_teste.pdf


2025-09-30 08:33:35,626 - INFO - Accelerator device: 'cpu'
2025-09-30 08:33:38,408 - INFO - Accelerator device: 'cpu'
2025-09-30 08:33:38,896 - INFO - Processing document galaxy_z_flip_7_teste.pdf
2025-09-30 08:34:39,444 - INFO - Finished converting document galaxy_z_flip_7_teste.pdf in 65.96 sec.


Conversão para Markdown concluída.
Iniciando o processo de chunking híbrido...
Chunking concluído. 106 chunks intermediários gerados.
Iniciando limpeza e filtragem dos chunks...
Limpeza concluída. 87 chunks finais restantes.


In [6]:
embedding_model = HuggingFaceEmbeddings(
    model_name="all-MiniLM-L6-v2",
    model_kwargs={'device': 'cpu'},
)

2025-09-30 08:34:39,701 - INFO - Load pretrained SentenceTransformer: all-MiniLM-L6-v2


In [19]:
db = build_vector_store(
    embedding_model=embedding_model,
    persist_directory=persist_directory,
    chunks=final_chunks
)

2025-09-30 08:41:56,894 - INFO - Anonymized telemetry enabled. See                     https://docs.trychroma.com/telemetry for more information.


Criando Vector Store com 87 chunks em './chroma_db_test1'...
Vector Store criado com sucesso.


In [33]:
llm = OllamaLLM(model="phi3:mini", temperature=0)

In [34]:
base_retriever = db.as_retriever(search_kwargs={"k": 10})
reranker_model = HuggingFaceCrossEncoder(model_name="cross-encoder/ms-marco-MiniLM-L-6-v2")
compressor = CrossEncoderReranker(model=reranker_model, top_n=4)

2025-09-30 08:45:37,893 - INFO - Use pytorch device: cpu


In [35]:
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor, 
    base_retriever=base_retriever
)

In [36]:
qa_chain = create_rag_chain(
                            llm=llm, 
                            retriever=compression_retriever
                            )

In [37]:
query = "how can i charge the phone?"
result = qa_chain.invoke({"query": query})

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-09-30 08:46:02,347 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"


In [38]:
pprint(result)

{'query': 'how can i charge the phone?',
 'result': 'To charge your phone using a wireless charger (sold separately), '
           'follow these steps:\n'
           '\n'
           '1. Fold your phone to prepare it for charging with another device '
           'on top of it, ensuring their backs are facing each other. The '
           'folded position helps maintain the connection between both devices '
           'and allows efficient power transfer from one to the other. \n'
           '\n'
           "2. Place the second device's back onto the center of a wireless "
           'charger pad or mat that supports fast charging (if available). '
           'Make sure it is properly aligned with any guiding markers on the '
           'surface, which will help maintain its position during charging and '
           'prevent sliding due to magnetic forces from your phone’s magnets.\n'
           '\n'
           '3. Once both devices are connected correctly, you should see a '
           '