# allganize-RAG-Evaluation - retrieval performance
## Methodology
```
1. Load DocStore, VectorStore
    1-1. Load DocStore
    1-2. Load VectorStore
2. Initialize Embedder
3. Load Evaluation Data
    3-1. Load Query & Ground Truth
    3-2. Calculate Query Embeddings
4. Run Retrieval
```


In [117]:
import json
import os

from config import settings

# 1. Load DocStore, VectorStore

## 1-1. Load DocStore

In [118]:
from psiking.core.storage.docstore.in_memory import InMemoryDocumentStore

In [119]:
doc_store = InMemoryDocumentStore()
doc_store.load('database/docstore_multimodal_v2507.json')

## 1-2. Load VectorStore

In [120]:
from qdrant_client import QdrantClient
from psiking.core.storage.vectorstore.qdrant import QdrantSingleVectorStore

# initialize client
# client = QdrantClient(":memory:")
client = QdrantClient(host="localhost", port=6333)
collection_name = "allganize-finance-multimodal-v2507"

vector_store = QdrantSingleVectorStore(
    collection_name=collection_name,
    client=client
)

# 2. Intialize Embedder

In [121]:
import asyncio
from tqdm.asyncio import tqdm

from psiking.core.embedder.vllm.online_jina_emb_v4 import VLLMOnlineJinaEmbV4Embedder
embedder = VLLMOnlineJinaEmbV4Embedder(
    base_url=settings.multimodal_embedding_base_url,
    model=settings.multimodal_embedding_model
)

# 3. Load Evaluation Data

## 3-1. Load Query & Ground Truth

In [122]:
import pandas as pd
answer_df = pd.read_csv('data/retrieval_ground_truth.tsv', sep='\t')
print(answer_df.shape[0])
answer_df = answer_df[answer_df.domain=='finance']
print(answer_df.shape[0])

300
60


In [123]:
answer_df.head()

Unnamed: 0,domain,question,target_file_id,target_file_name,target_page_no,context_type,target_answer
0,finance,"시중은행, 지방은행, 인터넷은행의 인가 요건 및 절차에 차이가 있는데 그 차이점은 ...",c94f675e-7d81-48bd-88f8-c5ff766190cc,[별첨] 지방은행의 시중은행 전환시 인가방식 및 절차.pdf,4,paragraph,"시중은행, 지방은행, 인터넷은행 모두 은행업을 영위하기 위해서는 '은행법' 제8조에..."
1,finance,"은행업을 신청하고자 할 때, 은행법상 소유규제에 부합하는 대주주 요건을 충족하려면 ...",c94f675e-7d81-48bd-88f8-c5ff766190cc,[별첨] 지방은행의 시중은행 전환시 인가방식 및 절차.pdf,7,table,은행업을 신청하려면 대주주 요건을 충족해야 합니다. 대주주 요건으로는 부실금융기관 ...
2,finance,본인가를 받으려는 지방은행이 시중은행 전환시 예비인가를 받을 필요가 있는지 설명하시...,c94f675e-7d81-48bd-88f8-c5ff766190cc,[별첨] 지방은행의 시중은행 전환시 인가방식 및 절차.pdf,8,paragraph,"본인가를 받으려는 지방은행이 시중은행 전환을 신청하는 경우, 예비인가를 받을 필요는..."
3,finance,"은행법에 의거 예비인가를 신청할 수 있는지와, 그 경우 금융위원회가 검토했어야 하는...",c94f675e-7d81-48bd-88f8-c5ff766190cc,[별첨] 지방은행의 시중은행 전환시 인가방식 및 절차.pdf,10,paragraph,은행법에 의하면 예비인가를 신청할 수 있습니다. 제8조에 따른 인가를 받으려는 자는...
4,finance,2019년 YTD 기준으로 브라질의 주식 시장 수익률과 베트남의 주식 시장 수익률 ...,7373884a-8255-482d-9e7c-00b919083526,★2019 제1회 증시콘서트 자료집_최종★.pdf,6,image,Refinitiv에서 제공한 자료에 따르면 2019년 YTD 브라질의 주식 시장 수...


## 3-2. Calculate Query Embeddings

In [124]:
import asyncio
from typing import List
from tqdm.asyncio import tqdm as atqdm

import numpy as np
from psiking.core.storage.vectorstore.schema import (
    MetadataFilters,
    FilterOperator,
    VectorStoreQuery,
    VectorStoreQueryMode,
    VectorStoreQueryOptions,
)   

In [125]:
queries = answer_df.question.values.tolist()

In [126]:
async def calculate_embedding(semaphore, text: str):
    messages = [
        {
            'role': 'user',
            'content': [
                {'type': 'text', 'text': text},
            ]
        }
    ]
    async with semaphore:
        try:
            embedding = await embedder.arun(
                input=messages,
                input_format='messages',
                pool=True,
                normalize=True
            )
        except Exception as e:
            print("ERR {}".format( str(e)))
            raise e
    return embedding

In [127]:
semaphore = asyncio.Semaphore(16)
tasks = []
for query in queries:
    task = calculate_embedding(semaphore, query)
    tasks.append(task)

query_embeddings = await tqdm.gather(*tasks)

100%|██████████| 60/60 [00:16<00:00,  3.64it/s]


# 4. Run Retrieval

In [133]:
def retrieve(
    query_embedding: np.ndarray,
    k: int = 10
)->List[str]:
    """Retrieves point(chunk's) documentids"""
    vsquery=VectorStoreQuery(
        dense_embedding=query_embedding
    )
    vsoptions=VectorStoreQueryOptions(
        mode=VectorStoreQueryMode.DENSE,
        top_k=k
    )
    
    chunks = vector_store.query(
        query=vsquery,
        options=vsoptions
    )
    chunk_ids = [x.id for x in chunks]
    return chunk_ids

In [134]:
k = 30
retrieved_chunkids = [
    retrieve(x, k=k) for x in query_embeddings
]

# 5. Evaluate

## 5-1. Source 'File' Level Evaluation
* Chunk is considered relevant if it is from the ground truth target file

In [135]:
answer_fileids = [
    answer_df.iloc[x]['target_file_id']
    for x in range(len(queries))
]

In [136]:
retrieved_fileids = [
    [
        doc_store.get(x)[0].metadata['source_id']
        for x in chunkids
    ]
    for chunkids in retrieved_chunkids
]

### 5-1-1. mean Average Precision

In [143]:
def calculate_filelevel_ap(
    retrieved_ids: List[str],
    answer_id: str,
    at_k: int = 5
) -> float:
    precisions = []
    relevant_count = 0
    for i in range(at_k):
        if retrieved_ids[i]==answer_id:
            relevant_count+=1
        precisions.append(
            relevant_count/(i+1)
        )
    return sum(precisions)/len(precisions)
        
for at_k in [5,10,15]:
    aps = [
        calculate_filelevel_ap(retrieved, answer, at_k=at_k)
        for retrieved, answer in zip(retrieved_fileids, answer_fileids, strict=True)
    ]
    print("mAP@{}: {:.4f}".format(at_k, sum(aps)/len(aps)))

mAP@5: 0.7758
mAP@10: 0.7376
mAP@15: 0.6966


### 5-1-2. mean Reciprocal Rank

In [144]:
def calculate_filelevel_rr(
    retrieved_ids: List[str],
    answer_id: str,
    at_k: int = 5
) -> float:
    rr = 0.0
    for i in range(at_k):
        if retrieved_ids[i]==answer_id:
            return 1/(i+1)
    return rr

at_k = 5
for at_k in [5,10,15]:
    rrs = [
        calculate_filelevel_rr(retrieved, answer, at_k=at_k)
        for retrieved, answer in zip(retrieved_fileids, answer_fileids, strict=True)
    ]

    print("mRR@{}: {:.4f}".format(at_k, sum(rrs)/len(rrs)))

mRR@5: 0.8678
mRR@10: 0.8678
mRR@15: 0.8678


## 5-2. Source 'File+Page' Level Evaluation
* Chunk is considered relevant if it is from the ground truth file+page combination

In [165]:
answers = [
    (answer_df.iloc[x]['target_file_id'], int(answer_df.iloc[x]['target_page_no']))
    for x in range(len(queries))
]

In [166]:
answers[0]

('c94f675e-7d81-48bd-88f8-c5ff766190cc', 4)

In [161]:
x = retrieved_chunkids[11][0]
doc_store.get(x)[0].metadata

{'reader': 'DoclingPDFReader',
 'source_id': '980889bb-16cd-447f-b5eb-1384b84903cc',
 'domain': 'finance',
 'prov': '[{"page_no": 30, "bbox": {"l": 68.143, "t": 126.11800732421875, "r": 223.938, "b": 102.7500073242187, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 79]}]'}

In [149]:
retrieved_metadata = [
    [
        doc_store.get(x)[0].metadata
        for x in chunkids
    ]
    for chunkids in retrieved_chunkids
]

### 5-2-1. mean Average Precision

In [None]:
def determine_relevancy(metadata, answer):
    answer_fileid = answer[0]
    answer_pageno = answer[1]
    
    retrieved_fileid = metadata['source_id']
    if answer_fileid!=retrieved_fileid:
        return 0
    
    if metadata['reader']=='DoclingPDFReader':
        prov = json.loads(metadata['prov'])
        page_nos = [
            x['page_no'] for x in prov
        ]
    else:
        prov = metadata['prov']
        page_nos = [
            prov['page']
        ]
    
    if answer_pageno in page_nos:
        return 1
    return 0

def calculatel_ap(
    retrieved_metadatas: List[dict],
    answer: tuple,
    at_k: int = 5
) -> float:
    precisions = []
    relevant_count = 0
    for i in range(at_k):
        metadata = retrieved_metadatas[i]
        if determine_relevancy(metadata, answer):
            relevant_count+=1
        precisions.append(
            relevant_count/(i+1)
        )
    return sum(precisions)/len(precisions)
        
for at_k in [5,10,15]:
    aps = [
        calculatel_ap(retrieved, answer, at_k=at_k)
        for retrieved, answer in zip(retrieved_metadata, answers, strict=True)
    ]
    print("mAP@{}: {:.4f}".format(at_k, sum(aps)/len(aps)))

mAP@5: 0.2231
mAP@10: 0.1685
mAP@15: 0.1387


### 5-2-2. mean Reciprocal Rank

In [170]:
def determine_relevancy(metadata, answer):
    answer_fileid = answer[0]
    answer_pageno = answer[1]
    
    retrieved_fileid = metadata['source_id']
    if answer_fileid!=retrieved_fileid:
        return 0
    
    if metadata['reader']=='DoclingPDFReader':
        prov = json.loads(metadata['prov'])
        page_nos = [
            x['page_no'] for x in prov
        ]
    else:
        prov = metadata['prov']
        page_nos = [
            prov['page']
        ]
    
    if answer_pageno in page_nos:
        return 1
    return 0

def calculate_rr(
    retrieved_metadatas: List[dict],
    answer: tuple,
    at_k: int = 5
) -> float:
    for i in range(at_k):
        metadata = retrieved_metadatas[i]
        if determine_relevancy(metadata, answer):
            return 1/(i+1)
    return 0.0
        
for at_k in [5,10,15]:
    rrs = [
        calculate_rr(retrieved, answer, at_k=at_k)
        for retrieved, answer in zip(retrieved_metadata, answers, strict=True)
    ]
    print("mRR@{}: {:.4f}".format(at_k, sum(rrs)/len(rrs)))

mRR@5: 0.4381
mRR@10: 0.4477
mRR@15: 0.4517
