# **Benchmark**

# 0. Setup

In [1]:

!pip install -q langchain langchain-text-splitters langchain-community sentence-transformers faiss-cpu


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m23.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.8/23.8 MB[0m [31m24.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m25.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m484.9/484.9 kB[0m [31m10.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m64.7/64.7 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m51.0/51.0 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
google-colab 1.0.0 requires requests==2.32.4, but you have requests 2.32.5 which is incompatible.[0m[3

In [63]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_core.documents import Document

Versione 1: reach Precision 58.24% (20 min)

In [None]:
def retrieve_relevant_chunks(vector_db, target_event, k=5):
    # vector search (FAISS) is that it calculates the similarity based on the overall distance of the text
    # Naive RAG
    relevant_chunks = vector_db.similarity_search(target_event, k=k)
    return relevant_chunks

Versione 2 (Reranking): reachs Precision 67.79% (43 min)

In [55]:
!pip install sentence-transformers



In [61]:
from sentence_transformers import CrossEncoder

# 1. Initialize Reranker
reranker_model = CrossEncoder('BAAI/bge-reranker-base', device='cpu')

def retrieve_relevant_chunks_with_reranking(vector_db, query, k_initial=20, k_final=5):
    #  1: we take k_initial (high) candidates to make sure the answer is among them
    initial_docs = vector_db.similarity_search(query, k=k_initial)

    if not initial_docs:
        return []

    # 2: Preparing couples for the Reranker [Query, Document]
    pairs = [[query, doc.page_content] for doc in initial_docs]

    # 3: Scoring (Reranking)
    scores = reranker_model.predict(pairs)

    # 4: Sorting and selecting the final top-K
    scored_docs = sorted(zip(initial_docs, scores), key=lambda x: x[1], reverse=True)
    final_docs = [doc for doc, score in scored_docs[:k_final]]

    return final_docs

## 1. load benchmark  dataset

In [46]:
from datasets import load_dataset

# 1. upload subset(hotpotqa=generalknowledge) of RAGBench
RAGBench_dataset = load_dataset("galileo-ai/ragbench", "hotpotqa", split="test")

#-----info-------
#id
#question
#documents
#response
#documents_sentences <- documents are already divided in senteces/chuncks associated to a id (e.g 1a, 1b, 2c etc)
#all_relevant_sentence_keys <- ids of the useful sentences/chuncks

# 2. Download a model for Context Relevance

In [12]:
from langchain_core.documents import Document

# Initialize embeddings
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2", model_kwargs={'device': 'cpu'})


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

# 3. Generation of vector db based on given document sentences



In [37]:
from langchain_core.documents import Document
from langchain_community.vectorstores import FAISS

def create_vector_db_from_sentences(doc_sentences, embeddings_model):
    documents = []
    for item in doc_sentences:
        # item :  [["1a": "testo della frase..."], [...], ...]
        for sentence in item:
            doc = Document(
                page_content=sentence[1],
                metadata={"id": sentence[0]} # Salviamo l'ID qui
            )
            documents.append(doc)

    # Create the temporary DB for this specific query
    return FAISS.from_documents(documents, embeddings_model)

# Test

In [None]:
def test_ragbench_retrieval(dataset):
    successi = 0
    totale = 0

    for item in dataset:
        query = item['question']
        chunks = item.get('documents_sentences')
        gold_chunks= item.get('all_relevant_sentence_keys')

        # every question has different documents
        vector_db_bench = create_vector_db_from_sentences(chunks, embeddings)
        k=len(gold_chunks)
        totale += k

        ## version 1
        #retrived_chunks = retrieve_relevant_chunks(vector_db_bench, query, k=k)
        ## version 2
        retrived_chunks= retrieve_relevant_chunks_with_reranking(vector_db_bench, query, k_initial=20, k_final=k)

        # 3. Valutation: retrived are contained in gold_chunks
        for chunk in retrived_chunks:
            id=chunk.metadata['id']
            if id in gold_chunks:
                successi += 1

    # completeness Len(Ri intersect Ri)/Len(Ri)
    precision = successi / totale
    print(f"RAGBench Context Precision: {precision:.2%}")
    return precision

test_ragbench_retrieval(RAGBench_dataset)