In [7]:
import json
import logging
import pandas as pd
from sentence_transformers import CrossEncoder

from financerag.tasks import FinDER
from financerag.retrieval import DenseRetrieval, SentenceTransformerEncoder
from financerag.rerank import CrossEncoderReranker
from financerag.tasks.BaseTask import BaseTask  # optional if you prefer static evaluate

logging.basicConfig(level=logging.INFO)

CORPUS_PATH = "/Users/vikashpr/Dev/Python/FinanceRAG/icaif-24-finance-rag-challenge/finder_corpus.jsonl/corpus.jsonl"
QUERY_PATH = "/Users/vikashpr/Dev/Python/FinanceRAG/icaif-24-finance-rag-challenge/finder_queries.jsonl/queries.jsonl"
QRELS_PATH = "/Users/vikashpr/Dev/Python/FinanceRAG/icaif-24-finance-rag-challenge/FinDER_qrels.tsv"

def load_jsonl(path):
    with open(path, "r") as f:
        for line in f:
            yield json.loads(line)

class LocalFinDER(FinDER):
    def load_data(self):
        # override BaseTask.load_data so it doesn't try HF
        self.queries = {}
        self.corpus = {}

corpus = {
    doc["_id"]: {"title": doc.get("title", ""), "text": doc.get("text", "")}
    for doc in load_jsonl(CORPUS_PATH)
}
queries = {q["_id"]: q["text"] for q in load_jsonl(QUERY_PATH)}

finder_task = LocalFinDER()
finder_task.corpus = corpus
finder_task.queries = queries

df = pd.read_csv(QRELS_PATH, sep="\t")
qrels = df.groupby("query_id").apply(lambda g: dict(zip(g["corpus_id"], g["score"]))).to_dict()

encoder = SentenceTransformerEncoder(
    model_name_or_path="intfloat/e5-large-v2",
    query_prompt="query: ",
    doc_prompt="passage: ",
)
retriever = DenseRetrieval(model=encoder)

retrieval_result = finder_task.retrieve(retriever=retriever, top_k=200)

reranker = CrossEncoderReranker(
    model=CrossEncoder("cross-encoder/ms-marco-MiniLM-L-12-v2")
)
reranking_result = finder_task.rerank(
    reranker=reranker,
    results=retrieval_result,
    top_k=100,
    batch_size=32,
)

ndcg, map_, recall, precision = finder_task.evaluate(
    qrels=qrels,
    results=reranking_result,
    k_values=[1, 5, 10],
)
print("NDCG:", ndcg)
print("MAP:", map_)
print("Recall:", recall)
print("Precision:", precision)

finder_task.save_results(output_dir="./results")

  qrels = df.groupby("query_id").apply(lambda g: dict(zip(g["corpus_id"], g["score"]))).to_dict()
INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: mps
INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: intfloat/e5-large-v2
INFO:financerag.retrieval.dense:Encoding queries...
Batches: 100%|██████████| 4/4 [00:03<00:00,  1.29it/s]
INFO:financerag.retrieval.dense:Sorting corpus by document length...
INFO:financerag.retrieval.dense:Encoding corpus in batches... This may take a while.
INFO:financerag.retrieval.dense:Encoding batch 1/1...
Batches: 100%|██████████| 217/217 [24:07<00:00,  6.67s/it]
INFO:sentence_transformers.cross_encoder.CrossEncoder:Use pytorch device: mps
INFO:financerag.rerank.cross_encoder:Starting To Rerank Top-100....
Batches: 100%|██████████| 675/675 [10:52<00:00,  1.03it/s]
INFO:financerag.tasks.BaseTask:For evaluation, we ignore identical query and document ids (default), please explicitly set ``ignore_identical_

NDCG: {'NDCG@1': 0.25, 'NDCG@5': 0.3363, 'NDCG@10': 0.37007}
MAP: {'MAP@1': 0.21875, 'MAP@5': 0.30514, 'MAP@10': 0.3217}
Recall: {'Recall@1': 0.21875, 'Recall@5': 0.39687, 'Recall@10': 0.48646}
Precision: {'P@1': 0.25, 'P@5': 0.1125, 'P@10': 0.07188}
