In [22]:
from pathlib import Path
import sys, os
libs_path = (Path(os.path.abspath(os.path.join('..'))).parent)
sys.path.append(str(libs_path))

from typing import Dict

import ir_datasets

from ir_measures import *
from match_and_rank import match_and_rank, clustering_match_and_rank

import ir_measures

In [23]:
def __get_queries_corpus(dataset_name: str) -> Dict[str, str]:
    if dataset_name == "lifestyle":
        queries_corpus = dict(ir_datasets.load("lotte/lifestyle/dev/search").queries_iter())
    elif dataset_name == "quora":
        queries_corpus = dict(list(ir_datasets.load("beir/quora/dev").queries_iter())[:1000])
    else:
        queries_corpus = dict(ir_datasets.load("antique/test").queries_iter())
    return queries_corpus


def __get_qrels_corpus(dataset_name: str):
    if dataset_name == "lifestyle":
        qrels_corpus = list(ir_datasets.load("lotte/lifestyle/dev/search").qrels_iter())
    elif dataset_name == "quora":
        qrels_corpus = list(ir_datasets.load("beir/quora/dev").qrels_iter())
    else:
        qrels_corpus = list(ir_datasets.load("antique/test").qrels_iter())
    return qrels_corpus

In [24]:
def _get_ground_truth(dataset_name: str):
    queries_corpus = __get_queries_corpus(dataset_name)
    qrels_corpus = __get_qrels_corpus(dataset_name)
    ground_truth = {}
    for query_id, query in queries_corpus.items():
        relevant_docs = [(item.doc_id, item.relevance) for item in qrels_corpus if item.query_id == query_id]
        ground_truth[query_id] = dict(relevant_docs)
    return ground_truth


def _get_search_results(dataset_name: str):
    search_results = {}
    queries_corpus = __get_queries_corpus(dataset_name)
    for query_id, query in queries_corpus.items():
        # print(f'Evaluating query {query_id}')
        results = match_and_rank(query, dataset_name)
        relevance_documents = [(doc_id, score) for doc_id, score in results.items()]
        search_results[query_id] = dict(relevance_documents)
    return search_results

def _get_clustering_search_results(dataset_name: str):
    search_results = {}
    queries_corpus = __get_queries_corpus(dataset_name)
    for query_id, query in queries_corpus.items():
        results = clustering_match_and_rank(query, dataset_name)
        relevance_documents = [(doc_id, score) for doc_id, score in results.items()]
        search_results[query_id] = dict(relevance_documents)
    return search_results

In [25]:
def evaluate(dataset_name: str):
    ground_truth = _get_ground_truth(dataset_name)
    search_results = _get_search_results(dataset_name)
    # search_results = _get_clustering_search_results(dataset_name)

    evaluation_results = ir_measures.calc_aggregate([AP@10, AP, RR, P@10, R@5, R@10], ground_truth, search_results)
    print(evaluation_results)


evaluate("antique")
# {R@10: 0.13392374015710196, AP@10: 0.10606845337818997, AP: 0.2275471168023467, R@5: 0.08066153575893291, P@10: 0.39799999999999985, RR: 0.7656958906370507}

evaluate("lifestyle")
# {P@10: 0.07865707434052781, R@10: 0.28140366827417185, AP@10: 0.16861801215461153, R@5: 0.225449969694574, AP: 0.18797523744808736, RR: 0.30351466896690943}

evaluate("quora")
# {AP@10: 0.6308512246902128, R@5: 0.7224104784104783, P@10: 0.14700000000000057, R@10: 0.7930852406852404, RR: 0.7238663335960567, AP: 0.6498598451096447}

Loading antique dataset 403666
Loading antique dataset 403666
Loading antique dataset 403666
Loading antique dataset 403666
Loading antique dataset 403666
Loading antique dataset 403666
Loading antique dataset 403666
Loading antique dataset 403666
Loading antique dataset 403666
Loading antique dataset 403666
Loading antique dataset 403666
Loading antique dataset 403666
Loading antique dataset 403666
Loading antique dataset 403666
Loading antique dataset 403666
Loading antique dataset 403666
Loading antique dataset 403666
Loading antique dataset 403666
Loading antique dataset 403666
Loading antique dataset 403666
Loading antique dataset 403666
Loading antique dataset 403666


KeyboardInterrupt: 