In [1]:
import pandas as pd
import requests
from storge.storge import load_df

In [2]:
def get_relevent_lists(dataset: str, irr: int):
    df = load_df(dataset,'qrels')
    queries = df['query_id'].tolist()
    docs = df['doc_id'].tolist()
    relevance = df['relevance'].tolist()
    relevent_lists = {}
    for i in range(0, len(relevance)):
        if relevance[i] != irr:
            if queries[i] not in relevent_lists:
                relevent_lists[queries[i]] = []
            relevent_lists[queries[i]].append(docs[i])
    return relevent_lists

In [3]:
def get_retrieved_lists(dataset: str):
    df = load_df(dataset,'queries')
    query_id = df['query_id'].tolist()
    text = df['text'].tolist()
    retrieved_lists = {}
    for i in range(0, len(query_id)):
        response = requests.get(f"http://127.0.0.1:8000/search/{dataset}", params={"query": text[i]})
        results = response.json()
        retrieved_lists[query_id[i]] = results["ID"]
    return retrieved_lists

In [4]:
def precision_at_10(retrieved_list, relevant_list):
    top_10_retrieved = retrieved_list[:10]
    num_relevant = sum([1 for doc in top_10_retrieved if doc in relevant_list])
    p_at_10 = num_relevant / 10.0
    return p_at_10

In [5]:
def recall_at_10(retrieved_list, relevant_list):
    top_10_retrieved = retrieved_list[:10]
    num_relevant_in_top_10 = sum([1 for doc in top_10_retrieved if doc in relevant_list])
    r_at_10 = num_relevant_in_top_10 / len(relevant_list) if relevant_list else 0
    return r_at_10
 

In [6]:
def calculate_map_at_k(retrieved_lists, relevant_lists, k=10):
    ap_at_k = []
    for query, retrieved_list in retrieved_lists.items():
        if query in relevant_lists:
            relevant_list = set(relevant_lists[query])
            ap = 0
            correct = 0
            
            num_elements_to_consider = min(k, len(retrieved_list))
            
            for i, doc in enumerate(retrieved_list[:num_elements_to_consider]):
                if doc in relevant_list:
                    correct += 1
                    ap += correct / (i + 1)
                    
            if correct > 0:
                ap /= min(len(relevant_list), num_elements_to_consider)
                ap_at_k.append(ap)
                
    return sum(ap_at_k) / len(ap_at_k) if ap_at_k else 0


In [7]:
def calculate_mrr_at_k(retrieved_lists, relevant_lists, k=10):
    rr_at_k = []
    for query, retrieved_list in retrieved_lists.items():
        if query in relevant_lists:
            relevant_list = set(relevant_lists[query])

            num_elements_to_consider = min(k, len(retrieved_list))
            
            for i, doc in enumerate(retrieved_list[:num_elements_to_consider]):
                if doc in relevant_list:
                    rr_at_k.append(1 / (i + 1))
                    break
            else:

                rr_at_k.append(0)
                

    return sum(rr_at_k) / len(rr_at_k) if rr_at_k else 0


In [8]:
def evaluation(dataset: str, irr: int):
    df = load_df(dataset,'queries')
    query_id = df['query_id'].tolist()


    relevent_lists = get_relevent_lists(dataset, irr)
    retrieved_lists = get_retrieved_lists(dataset)


    print(f"MAP for {dataset}:")
    print(calculate_map_at_k(retrieved_lists,relevent_lists,10))

    print(f"MRR for {dataset}:")
    print(calculate_mrr_at_k(retrieved_lists,relevent_lists,10))


    print(f"precision for the first five queries:")
    for id in query_id[0:5]:
        print(precision_at_10(retrieved_lists[id], relevent_lists[id]))
    
    sum = 0
    for id in query_id:
        sum += precision_at_10(retrieved_lists[id], relevent_lists[id])
    sum /= len(query_id)

    print(f"the avrage of precision of all queries:")
    print(sum)



    print(f"recall for the first five queries:")
    for id in query_id[0:5]:
        print(recall_at_10(retrieved_lists[id], relevent_lists[id]))

    sum = 0
    for id in query_id:
        sum += recall_at_10(retrieved_lists[id], relevent_lists[id])
    sum /= len(query_id)

    print(f"the avrage of recall of all queries:")
    print(sum)
    

In [9]:
evaluation("wikiren1ktraining", 0)

MAP for wikiren1ktraining:
0.18207468705704702
MRR for wikiren1ktraining:
0.5826276216857943
precision for the first five queries:
0.6
0.1
0.1
0.1
0.1
the avrage of precision of all queries:
0.1863573407202201
recall for the first five queries:
1.0
0.16666666666666666
0.16666666666666666
0.1111111111111111
0.07142857142857142
the avrage of recall of all queries:
0.17074972751884332


In [10]:
evaluation("antiquetrain", 1)

MAP for antiquetrain:
0.17007984556232536
MRR for antiquetrain:
0.27233165181433955
precision for the first five queries:
0.0
0.8
0.0
0.1
0.0
the avrage of precision of all queries:
0.08858202802967746
recall for the first five queries:
0.0
0.08695652173913043
0.0
0.2
0.0
the avrage of recall of all queries:
0.10733875141449607
