In [3]:
import sys
import os
sys.path.append("..") 

In [4]:
from langchain_chroma import Chroma
from ingestion import load_documents, split_documents, get_embeddings
from vector_store import build_or_update_chroma_db
from rag_query import query_rag

âœ… Loading existing Chroma DB (read-only)


In [5]:
evaluation_set = [
    {
        "question": "How do you get out of jail in Monopoly?",
        "expected_source": {"monopoly.pdf"},
        "expected_page": 5
    },
    {
        "question": "What is the total starting money for a player in Monopoly and how is it divided?",
        "expected_source": {"monopoly.pdf"},
        "expected_page": 3
    },
    {
        "question": "Under what conditions is a player declared bankrupt in Monopoly?",
        "expected_source": {"monopoly.pdf"},
        "expected_page": 6
    },
    {
        "question": "In Ticket to Ride, how many Destination Tickets must a player keep at the start of the game?",
        "expected_source": {"ticket_to_ride.pdf"},
        "expected_page": 2
    },
    {
        "question": "How many points is a route of length 6 worth in Ticket to Ride?",
        "expected_source": {"ticket_to_ride.pdf"},
        "expected_page": 4
    },
    {
        "question": "What triggers the end of the game in Ticket to Ride?",
        "expected_source": {"ticket_to_ride.pdf"},
        "expected_page": 4
    },
    {
        "question": "What are the rules for using the 'Speed Die' in Monopoly?",
        "expected_source": {"monopoly.pdf"},
        "expected_page": 1
    },
    {
        "question": "What is the bonus for having the Longest Continuous Path in Ticket to Ride?",
        "expected_source": {"ticket_to_ride.pdf"},
        "expected_page": 4
    }
]

In [6]:
import os

def evaluate_retrieval(db, evaluation_set, k=5):
    hits = 0

    for item in evaluation_set:
        question = item["question"]
        expected = item["expected_source"]

        results = db.similarity_search(question, k=k)

        # ðŸ”‘ NORMALISATION DES SOURCES
        retrieved_sources = {
            os.path.basename(doc.metadata["source"])
            for doc in results
        }

        hit = len(retrieved_sources.intersection(expected)) > 0

        print(f"Question: {question}")
        print(f"Retrieved sources: {retrieved_sources}")
        print(f"Expected: {expected}")
        print(f"HIT: {hit}")
        print("-" * 50)

        if hit:
            hits += 1

    recall = hits / len(evaluation_set)
    print(f"\nRecall@{k}: {recall:.2f}")
    return recall


In [7]:
from langchain_chroma import Chroma

db = Chroma(
    persist_directory="../chroma_db",
    embedding_function=get_embeddings()
)

evaluate_retrieval(db, evaluation_set, k=5)

Question: How do you get out of jail in Monopoly?
Retrieved sources: {'monopoly.pdf'}
Expected: {'monopoly.pdf'}
HIT: True
--------------------------------------------------
Question: What is the total starting money for a player in Monopoly and how is it divided?
Retrieved sources: {'monopoly.pdf'}
Expected: {'monopoly.pdf'}
HIT: True
--------------------------------------------------
Question: Under what conditions is a player declared bankrupt in Monopoly?
Retrieved sources: {'monopoly.pdf'}
Expected: {'monopoly.pdf'}
HIT: True
--------------------------------------------------
Question: In Ticket to Ride, how many Destination Tickets must a player keep at the start of the game?
Retrieved sources: {'ticket_to_ride.pdf'}
Expected: {'ticket_to_ride.pdf'}
HIT: True
--------------------------------------------------
Question: How many points is a route of length 6 worth in Ticket to Ride?
Retrieved sources: {'ticket_to_ride.pdf'}
Expected: {'ticket_to_ride.pdf'}
HIT: True
--------------

1.0

MRR :  the position in which the first relevant document appears.

In [8]:
def evaluate_mrr(db, evaluation_set, k=5):
   
    reciprocal_ranks = []  # store 1/rank for each question

    for item in evaluation_set:
        question = item["question"]
        expected_sources = item["expected_source"]

        # Retrieve top-k documents
        results = db.similarity_search_with_score(question, k=k)

        rank = 0  # rank of first relevant document (0 = not found)

        for idx, (doc, _score) in enumerate(results, start=1):
            source = doc.metadata.get("source")

            source_name = source.split("\\")[-1].split("/")[-1]

            # First relevant document found
            if source_name in expected_sources:
                rank = idx
                break

        # Compute reciprocal rank
        if rank > 0:
            reciprocal_ranks.append(1 / rank)
        else:
            reciprocal_ranks.append(0)

    # Mean Reciprocal Rank
    mrr = sum(reciprocal_ranks) / len(reciprocal_ranks)

    print(f"MRR@{k}: {mrr:.2f}")
    return mrr


In [9]:
evaluate_mrr(db, evaluation_set, k=5)


MRR@5: 1.00


1.0