In [24]:
!pip install faiss-cpu



In [25]:
import sys
import os
sys.path.append(os.path.abspath('..'))

In [26]:
from retrievers.BM25 import BM25Retriever
from retrievers.SPLADE import SPLADERetriever
from retrievers.DPR import DPRRetriever
from retrievers.UnionRetriever import UnionRetriever
# from retrievers.ColBERT import ColBERTRetriever
from Evaluation import Evaluation

In [27]:
import sys
import os
sys.path.append(os.path.abspath('..'))

In [28]:
doc_path = "../dataset/CISI.ALL"
qry_path = "../dataset/CISI.QRY"
rel_path = "../dataset/CISI.REL"

dpr_retriever = DPRRetriever(doc_path, qry_path, rel_path)
bm25_retriever = BM25Retriever(doc_path, qry_path, rel_path)
colbert_retriever = ColBERTRetriever(doc_path, qry_path, rel_path)
splade_retriever = SPLADERetriever(doc_path, qry_path, rel_path)

experiments = [
    (UnionRetriever([bm25_retriever, dpr_retriever], splade_retriever, doc_path, qry_path, rel_path, 100), '../results/union_bm25_dpr_splade_evaluation.txt'),
    (UnionRetriever([bm25_retriever, dpr_retriever], colbert_retriever, doc_path, qry_path, rel_path, 100), '../results/union_bm25_dpr_colbert_evaluation.txt'),
    (UnionRetriever([splade_retriever, dpr_retriever], bm25_retriever, doc_path, qry_path, rel_path, 100), '../results/union_dpr_splade_bm25_evaluation.txt'),
    (UnionRetriever([splade_retriever, dpr_retriever], colbert_retriever, doc_path, qry_path, rel_path, 100), '../results/union_dpr_splade_colbert_evaluation.txt'),
    (UnionRetriever([bm25_retriever, splade_retriever], dpr_retriever, doc_path, qry_path, rel_path, 100), '../results/union_bm25_splade_bpr_evaluation.txt'),
    (UnionRetriever([bm25_retriever, splade_retriever], colbert_retriever, doc_path, qry_path, rel_path, 100), '../results/union_bm25_splade_colbert_evaluation.txt'),
    (UnionRetriever([bm25_retriever, dpr_retriever, splade_retriever], colbert_retriever, doc_path, qry_path, rel_path, 100), '../results/union_bm25_dpr_splade_colbert_evaluation.txt'),
    (UnionRetriever([bm25_retriever, dpr_retriever, colbert_retriever], splade_retriever, doc_path, qry_path, rel_path, 100), '../results/union_bm25_dpr_colbert_splade_evaluation.txt'),
    (UnionRetriever([bm25_retriever, splade_retriever, colbert_retriever], dpr_retriever, doc_path, qry_path, rel_path, 100), '../results/union_bm25_splade_colbert_dpr_evaluation.txt'),
    (UnionRetriever([dpr_retriever, splade_retriever, colbert_retriever], bm25_retriever, doc_path, qry_path, rel_path, 100), '../results/union_dpr_splade_colbert_bm25_evaluation.txt'),
                ]

Some weights of the model checkpoint at facebook/dpr-question_encoder-single-nq-base were not used when initializing DPRQuestionEncoder: ['question_encoder.bert_model.pooler.dense.bias', 'question_encoder.bert_model.pooler.dense.weight']
- This IS expected if you are initializing DPRQuestionEncoder from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DPRQuestionEncoder from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at facebook/dpr-ctx_encoder-single-nq-base were not used when initializing DPRContextEncoder: ['ctx_encoder.bert_model.pooler.dense.bias', 'ctx_encoder.bert_model.pooler.dense.weight']
- This IS expected if you are initializing DPRContextEncoder from the

In [29]:
for experiment in experiments:
    model = experiment[0]
    results_file = experiment[1]
    retrieved_docs = {}
    relevant_docs = model.rel_set

    for idx in model.qry_set:
        num_relevant_docs = len(relevant_docs.get(idx, set()))
        retrieved_results = model.retrieve_score_union(idx)
        sliced_retrieved_results = retrieved_results[:num_relevant_docs]
        retrieved_docs[idx] = retrieved_results[:num_relevant_docs]


    retrieved_docs = {str(qid): [str(doc_id) for doc_id in docs] for qid, docs in retrieved_docs.items()}
    relevant_docs = {str(qid): {str(doc_id) for doc_id in docs} for qid, docs in relevant_docs.items()}

    print(retrieved_docs)

    query_ids = list(relevant_docs.keys())
    evaluator = Evaluation(retrieved_docs, relevant_docs, query_ids)

    results_file = results_file
    evaluator.save_evaluation_results(results_file)

{'1': ['429', '722', '29', '783', '1160', '1323', '24', '805', '60', '746', '784', '725', '1413', '661', '1165', '258', '596', '662', '449', '244', '1221', '489', '799', '1273', '1000', '456', '582', '1277', '315', '1210', '231', '666', '790', '412', '420', '69', '1246', '323', '492', '257', '898', '616', '503', '254', '1304', '1266'], '2': ['29', '578', '603', '35', '69', '1413', '810', '1010', '488', '806', '1096', '1284', '1076', '633', '746', '51', '784', '1338', '901', '871', '790', '1089', '139', '346', '1061', '315'], '3': ['1295', '1280', '1296', '1089', '1022', '1284', '1303', '1282', '1137', '1087', '1289', '255', '29', '574', '1220', '69', '60', '510', '1413', '462', '965', '536', '631', '1102', '856', '747', '1316', '469', '313', '463', '251', '15', '346', '855', '716', '1214', '599', '1115', '766', '683', '318', '494', '570', '1143'], '4': ['1280', '1294', '835', '1225', '332', '698', '1296', '1089'], '5': ['839', '611', '137', '1158', '122', '474', '1423', '139', '398', '