In [1]:
#!pip install faiss-cpu rank_bm25

### Aula 1

In [9]:
from langchain_classic.schema import Document
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings

llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash", temperature=0)
embeddings = GoogleGenerativeAIEmbeddings(model="gemini-embedding-001")

docs = [
    Document(
        page_content="O erro HTTP 404 Not Found ocorre quando o servidor não encontra o recurso solicitado. Isso pode ser causado por uma URL digitada incorretamente ou um link quebrado.",
        metadata={"source": "doc_http_404"}
    ),
    Document(
        page_content="SSH, ou Secure Shell, é um protocolo que permite acesso remoto seguro a servidores. Para conectar, utilize a porta padrão 22 e um cliente SSH.",
        metadata={"source": "doc_ssh_acesso_remoto"}
    ),
    Document(
        page_content="Para visualizar os contêineres em execução no Docker, use o comando 'docker ps'. Caso apareça o erro 'Cannot connect to the Docker daemon', verifique se o serviço do Docker está ativo.",
        metadata={"source": "doc_docker_comandos"}
    ),
    Document(
        page_content="A política de férias corporativa garante 30 dias de descanso por ano. O colaborador deve acessar o portal interno de RH e preencher o formulário 'FRM-01-FERIAS' para formalizar o pedido.",
        metadata={"source": "doc_ferias_com_formulario"}
    ),
    Document(
        page_content="Para solicitar férias, os colaboradores devem acessar o sistema de RH e seguir as etapas descritas no manual, preenchendo o formulário correto para liberação.",
        metadata={"source": "doc_ferias_sem_nome_formulario"}
    )
]

#### 1. Hybrid Search with EnsembleRetriever

In [10]:
from langchain_classic.retrievers import EnsembleRetriever, BM25Retriever # bm25 = keyword search (lexical)
from langchain_community.vectorstores import FAISS # faiss = vetorial search (semantic)

bm25_retriever = BM25Retriever.from_documents(docs)
bm25_retriever.k = 2

faiss_vectorstore = FAISS.from_documents(docs, embeddings)
faiss_retriever = faiss_vectorstore.as_retriever(search_kwargs={"k": 2})

ensemble_retriever = EnsembleRetriever(
    retrievers=[bm25_retriever, faiss_retriever],
    weights=[0.5,0.5]
)

In [11]:
query_keyword = "Como peço férias usando o formulário FRM-01-FERIAS?"

print(f"Query: {query_keyword}\n")

print("Results from BM25Retriever:")
doc_bm25 = bm25_retriever.invoke(query_keyword)
for doc in doc_bm25:
    print(f"- {doc.page_content}")

print("\nResults from FAISSRetriever:")
doc_faiss = faiss_retriever.invoke(query_keyword)
for doc in doc_faiss:
    print(f"- {doc.page_content}")

print("\nResults from EnsembleRetriever:")
doc_ensemble = ensemble_retriever.invoke(query_keyword)
for doc in doc_ensemble:
    print(f"- {doc.page_content}")

Query: Como peço férias usando o formulário FRM-01-FERIAS?

Results from BM25Retriever:
- A política de férias corporativa garante 30 dias de descanso por ano. O colaborador deve acessar o portal interno de RH e preencher o formulário 'FRM-01-FERIAS' para formalizar o pedido.
- Para solicitar férias, os colaboradores devem acessar o sistema de RH e seguir as etapas descritas no manual, preenchendo o formulário correto para liberação.

Results from FAISSRetriever:
- A política de férias corporativa garante 30 dias de descanso por ano. O colaborador deve acessar o portal interno de RH e preencher o formulário 'FRM-01-FERIAS' para formalizar o pedido.
- Para solicitar férias, os colaboradores devem acessar o sistema de RH e seguir as etapas descritas no manual, preenchendo o formulário correto para liberação.

Results from EnsembleRetriever:
- A política de férias corporativa garante 30 dias de descanso por ano. O colaborador deve acessar o portal interno de RH e preencher o formulário 'F

### Aula 2

#### 2. Multi-vector RAG with MultiVectorRetriever

In [12]:
import uuid
from langchain_classic.storage import InMemoryStore
from langchain_classic.retrievers.multi_vector import MultiVectorRetriever
from langchain_classic.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.prompts import ChatPromptTemplate

In [13]:
long_doc = [
    Document(
        page_content="""                                                     )
    Introdução à Segurança Cibernética (2024)...
    ...
    Uma das técnicas de ataque mais comuns é o Phishing...
    ...
    Conclusão: Manter-se atualizado... A autenticação de dois fatores (2FA) deve ser obrigatória.
    """,
        metadata={"source": "guia_seguranca_ciber.pdf", "ano": 2024}
    )
]

text_splitter = RecursiveCharacterTextSplitter(chunk_size=300)
doc_chunks = text_splitter.split_documents(long_doc)

def generate_summaries(docs, llm_model):
    """Generate summaries for a list of documents using a given LLM model."""
    prompt = ChatPromptTemplate.from_template("Resuma o seguinte documento em uma frase: {documento}")
    chain = prompt | llm_model
    summaries = chain.batch([{"documento": doc.page_content} for doc in docs])
    return [s.content for s in summaries]

In [14]:
doc_ids = [str(uuid.uuid4()) for _ in doc_chunks]

summary_chunks = generate_summaries(doc_chunks, llm)

store = InMemoryStore()
store.mset(list(zip(doc_ids, doc_chunks)))

summary_vectorstore = FAISS.from_texts(
    summary_chunks,
    embeddings,
    metadatas=[{"doc_id": doc_ids[i] for i in range(len(summary_chunks))}]
)

multi_vector_retriever = MultiVectorRetriever(
    vectorstore=summary_vectorstore,
    docstore=store,
    id_key="doc_id",
    search_kwargs={"k": 1}
)

In [15]:
query_summary = "qual a principal defesa contra ataques cibernéticos?"

retrieved_docs = multi_vector_retriever.invoke(query_summary)

print(f"Query: {query_summary}\n")
if retrieved_docs:
    for doc in retrieved_docs:
        print(f"- {doc.page_content}")

Query: qual a principal defesa contra ataques cibernéticos?

- )
    Introdução à Segurança Cibernética (2024)...
    ...
    Uma das técnicas de ataque mais comuns é o Phishing...
    ...
    Conclusão: Manter-se atualizado... A autenticação de dois fatores (2FA) deve ser obrigatória.


### Contextualizing Fusion

In hybrid search systems, the core idea is to combine two distinct approaches: **semantic search**, which uses embeddings to capture meaning and context, and **lexical search**, which relies on exact keyword matching. One of the major challenges of this method is merging results from different scales, ensuring that the combination actually broadens coverage without distorting the relevance of the retrieved documents.



### Linear Approach

A widely used method for merging results is through a **linear combination**. In this strategy, each result receives a weighted score from both mechanisms. For example, if we want to give higher weight to the lexical search for queries that include specific terms (such as precise codes or names), the formula can be expressed as:

$$score_{total} = w_{semantic} \cdot score_{semantic} + w_{lexical} \cdot score_{lexical}$$

Where the weights ($w_{semantic}$ and $w_{lexical}$) are determined based on the query type and can be adjusted according to the system's observed behavior in a production environment. This approach is intuitive and allows for fine-grained control over the influence of each method on the final score.

### Exploring Reciprocal Rank Fusion (RRF)

Another advanced technique is **Reciprocal Rank Fusion (RRF)**. Instead of combining scores directly, RRF utilizes the **rank** (position) of the results from each system. In short, for each item, a new score is calculated based on the inverse of its position in the results list. This technique has the advantage of reducing the impact of scale differences between search methods and can be especially useful when one of the mechanisms, on its own, produces less reliable scores.



The central idea is that if a document appears among the top results in both semantic and lexical searches, it will have a high final score. Conversely, documents that are in lower positions in either list will have their relevance reduced.

### Practical Aspects and Parameter Selection

Choosing between a linear combination and RRF depends on the application scenario. Parameters such as the weights assigned in the linear approach require a tuning phase, which can be guided by performance metrics and user feedback. RRF, by working directly with the ranking positions, can avoid some pitfalls related to diverging scales but may also be less sensitive to fine variations that a weighted adjustment would allow.

Regardless of the strategy adopted, the key point is to exploit the **synergy** between semantic and lexical search capabilities. This intelligent fusion can make a significant difference in the robustness and precision of information retrieval, providing a more reliable experience for different types of queries.

In this lesson, we learned:

* The **Hybrid Search** technique, which combines vector and lexical search to improve information retrieval.
* How to implement an **Ensemble Retriever**, uniting **BM25** and **FAISS** for hybrid search.
* The concept of **MultiVector RAG** and its application for retrieval in long documents.
* How **Query Transformation** with LLMs can enhance search effectiveness.
* The configuration and use of **In-Memory storage** within `LangChainStore`.
* Using **Reciprocal Rank Fusion (RRF)** to balance and rank results in hybrid searches.
* Document splitting into **chunks** using the `RecursiveCharacterTextSplitter`.
* Creating and storing **chunk summaries** to optimize search performance and accuracy.