In [1]:
from llama_index.core import (
    SimpleDirectoryReader,
    VectorStoreIndex,
    Settings,
)
from llama_index.llms.ollama import Ollama
from llama_index.core.node_parser import SimpleNodeParser
from llama_index.embeddings.huggingface import HuggingFaceEmbedding


from llama_index.core.evaluation import (
    generate_question_context_pairs,
)
from llama_index.core.evaluation import RetrieverEvaluator

import pandas as pd
import time
import nest_asyncio

nest_asyncio.apply()

  from .autonotebook import tqdm as notebook_tqdm


In [8]:
llm = Ollama(model="llama3.1", request_timeout=300.0)
Settings.llm = llm

In [9]:
documents = SimpleDirectoryReader(input_files=["gemma.pdf"]).load_data()

In [10]:
node_parser = SimpleNodeParser.from_defaults(chunk_size=512)
nodes = node_parser.get_nodes_from_documents(documents)

In [5]:
EMBEDDINGS = {
    "all-MiniLM":HuggingFaceEmbedding(model_name='sentence-transformers/all-MiniLM-L6-v2'),
    "gte-large": HuggingFaceEmbedding(model_name='Alibaba-NLP/gte-large-en-v1.5',trust_remote_code=True),
    "bge-large": HuggingFaceEmbedding(model_name='BAAI/bge-large-en'), 
    "bge-small": HuggingFaceEmbedding(model_name='BAAI/bge-small-en')
}

In [11]:
def display_results(embedding_name, eval_results):
    """Display results from evaluate."""

    metric_dicts = []
    for eval_result in eval_results:
        metric_dict = eval_result.metric_vals_dict
        metric_dicts.append(metric_dict)
    
    full_df = pd.DataFrame(metric_dicts)

    hit_rate = full_df["hit_rate"].mean()
    mrr = full_df["mrr"].mean()

    metric_df = pd.DataFrame(
        {"Embedding": [embedding_name],"hit_rate": [hit_rate], "mrr": [mrr]}
    )

    return metric_df

In [12]:
qa_dataset = generate_question_context_pairs(
    nodes,
    llm=llm,
    num_questions_per_chunk=1,
    qa_generate_prompt_tmpl = """\

    You are an expert in making questions from given text which are research level based. Your task is to setup \
    {num_questions_per_chunk} question. The questions should be diverse in nature \
    across the document. The questions should not contain options, not start with Q1/ Q2. \
    Restrict the questions to the context information provided. Respond only with questions.

    Context information is below.

    ---------------------
    {context_str}
    ---------------------

    Given the context information and not prior knowledge.
    generate only questions based on the below query.

    """
    )

100%|██████████| 54/54 [27:56<00:00, 31.05s/it]


In [13]:
results_df = pd.DataFrame()

for embed_name, embed_model in EMBEDDINGS.items():
    Settings.embed_model = embed_model
    vector_index = VectorStoreIndex(nodes)
    vector_retriever = vector_index.as_retriever(similarity_top_k=5)
    retriever_evaluator = RetrieverEvaluator.from_metric_names(
            ["mrr", "hit_rate"], retriever=vector_retriever
        )
    start_time = time.time()
    eval_results = await retriever_evaluator.aevaluate_dataset(qa_dataset)
    elapsed_time = time.time() - start_time

    current_df = display_results(embed_name,eval_results)
    current_df["total_time"] = elapsed_time
    results_df = pd.concat([results_df, current_df], ignore_index=True)

results_df

Unnamed: 0,Embedding,hit_rate,mrr,total_time
0,all-MiniLM,0.62963,0.435185,0.403985
1,gte-large,0.740741,0.553086,1.16209
2,bge-large,0.703704,0.550617,1.073932
3,bge-small,0.740741,0.58179,0.566546
