In [2]:
import sys
import os

# Adding src path so we can import modules
src_path = os.path.abspath(os.path.join('..', '..',))

if src_path not in sys.path:
    sys.path.append(src_path)

In [3]:
from core.base_urls import SEARCHING_URL
from core.datasets import dataset_test_1
from utils.extracting import extract_doc_ids
from evaluation.precision import get_precision_at_k
from evaluation.recall import get_recall
import requests

queries = dataset_test_1.queries_iter()
qrels = dataset_test_1.qrels_iter()

count = 0
recalls = []

for query in queries:
    if count == 10:
        break

    query_id = query.query_id
    print(f'processing {query_id}')

    # get matching results
    search_text = query.text
    request_body = {
        'dataset_id': 3,
        'search_text': search_text,
    }
    response = requests.post(SEARCHING_URL, json = request_body)    

    if response.status_code == 200:
        # get retrieved docs
        retrieved_docs = response.json()['data']
        retrieved_ids = extract_doc_ids(retrieved_docs)

        print('Retrieved IDs')
        print(retrieved_ids)

        # get relevant docs
        relevant_ids = [qrel.doc_id for qrel in qrels if qrel.query_id == query_id]
        # convert to int
        relevant_ids = [int(id) for id in relevant_ids]

        print('Relevant IDs')
        print(relevant_ids)

        # calc precision@10
        precision = get_precision_at_k(retrieved_ids, relevant_ids, 10)
        print(f'Precision@10 is: {precision}')

        # calc recall@10
        recall = get_recall(retrieved_ids, relevant_ids)
        print(f'Recall is: {recall}')
        recalls.append(recall)
        print('-------------------------------------')

        count = count + 1

    # calc for all queries recall
    avg_recall = sum(recalls) / len(recalls)
    print(f'Average Recall is: {avg_recall}')



processing 0
Retrieved IDs
[515489, 825728, 991137, 1065627, 1067605, 1285713, 1320137, 1469751, 1836154, 1910120, 2328272, 3203590, 3610080, 3845894, 4391121, 4423203, 4430962, 4435369, 4462419, 4465608, 4702639, 4784069, 5372773, 6077214, 6082738, 6219790, 6948886, 7399084, 7547329, 7581911, 8417211, 8891333, 9705208, 10342807, 10627801, 10906636, 10931595, 11335860, 11390393, 12086599, 12225214, 12801438, 12824568, 13036442, 13231899, 13878124, 13923069, 14103509, 14550841, 15405204, 16929739, 17123657, 17388232, 17518195, 18953920, 19651306, 20054396, 20155713, 20722510, 20888849, 21257564, 22522432, 23124332, 23244529, 23305547, 24660385, 24998637, 25298276, 25301182, 25657127, 26071782, 26445118, 26596106, 27077180, 28138927, 28193026, 28436879, 29334259, 32181055, 34034749, 34753204, 34905328, 36233757, 36480032, 36637129, 37437064, 37949139, 38180456, 39174007, 40212412, 40584205, 40667577, 42240424, 42731834, 43385013, 44420873, 45027320, 45638119, 95764370, 144555102]
Relevan