In [7]:
!pip install faiss-cpu



In [8]:
import sys
import os
sys.path.append(os.path.abspath('..'))

In [9]:
from retrievers.BM25 import BM25Retriever
from retrievers.SPLADE import SPLADERetriever
from retrievers.DPR import DPRRetriever
from retrievers.SequentialRetriever import SequentialRetriever
from retrievers.ColBERTLoadedRetriever import ColBERTRetriever
from Evaluation import Evaluation

In [10]:
import sys
import os
sys.path.append(os.path.abspath('..'))

In [11]:
doc_path = "../dataset/CISI.ALL"
qry_path = "../dataset/CISI.QRY"
rel_path = "../dataset/CISI.REL"

dpr_retriever = DPRRetriever(doc_path, qry_path, rel_path)
bm25_retriever = BM25Retriever(doc_path, qry_path, rel_path)
colbert_retriever = ColBERTRetriever(doc_path, qry_path, rel_path)
splade_retriever = SPLADERetriever(doc_path, qry_path, rel_path)

experiments = [
    (SequentialRetriever(bm25_retriever, bm25_retriever, doc_path, qry_path, rel_path), '../results/sequential_bm25_bm25_evaluation.txt'),
    (SequentialRetriever(bm25_retriever, dpr_retriever, doc_path, qry_path, rel_path), '../results/sequential_bm25_dpr_evaluation.txt'),
    (SequentialRetriever(bm25_retriever, colbert_retriever, doc_path, qry_path, rel_path), '../results/sequential_bm25_colbert_evaluation.txt'),
    (SequentialRetriever(bm25_retriever, splade_retriever, doc_path, qry_path, rel_path), '../results/sequential_bm25_splade_evaluation.txt'),
    (SequentialRetriever(dpr_retriever, bm25_retriever, doc_path, qry_path, rel_path), '../results/sequential_dpr_bm25_evaluation.txt'),
    (SequentialRetriever(dpr_retriever, dpr_retriever, doc_path, qry_path, rel_path), '../results/sequential_dpr_dpr_evaluation.txt'),
    (SequentialRetriever(dpr_retriever, colbert_retriever, doc_path, qry_path, rel_path), '../results/sequential_dpr_colbert_evaluation.txt'),
    (SequentialRetriever(dpr_retriever, splade_retriever, doc_path, qry_path, rel_path), '../results/sequential_dpr_splade_evaluation.txt'),
    (SequentialRetriever(colbert_retriever, bm25_retriever, doc_path, qry_path, rel_path), '../results/sequential_colbert_bm25_evaluation.txt'),
    (SequentialRetriever(colbert_retriever, dpr_retriever, doc_path, qry_path, rel_path), '../results/sequential_colbert_dpr_evaluation.txt'),
    (SequentialRetriever(colbert_retriever, colbert_retriever, doc_path, qry_path, rel_path), '../results/sequential_colbert_colbert_evaluation.txt'),
    (SequentialRetriever(colbert_retriever, splade_retriever, doc_path, qry_path, rel_path), '../results/sequential_colbert_splade_evaluation.txt'),
    (SequentialRetriever(splade_retriever, bm25_retriever, doc_path, qry_path, rel_path), '../results/sequential_splade_bm25_evaluation.txt'),
    (SequentialRetriever(splade_retriever, dpr_retriever, doc_path, qry_path, rel_path), '../results/sequential_splade_dpr_evaluation.txt'),
    (SequentialRetriever(splade_retriever, colbert_retriever, doc_path, qry_path, rel_path), '../results/sequential_splade_colbert_evaluation.txt'),
    (SequentialRetriever(splade_retriever, splade_retriever, doc_path, qry_path, rel_path), '../results/sequential_splade_splade_evaluation.txt'),
                ]

Some weights of the model checkpoint at facebook/dpr-question_encoder-single-nq-base were not used when initializing DPRQuestionEncoder: ['question_encoder.bert_model.pooler.dense.bias', 'question_encoder.bert_model.pooler.dense.weight']
- This IS expected if you are initializing DPRQuestionEncoder from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DPRQuestionEncoder from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at facebook/dpr-ctx_encoder-single-nq-base were not used when initializing DPRContextEncoder: ['ctx_encoder.bert_model.pooler.dense.bias', 'ctx_encoder.bert_model.pooler.dense.weight']
- This IS expected if you are initializing DPRContextEncoder from the

In [12]:
for experiment in experiments:
    model = experiment[0]
    results_file = experiment[1]
    retrieved_docs = {}
    relevant_docs = model.rel_set

    for idx in model.qry_set:
        num_relevant_docs = len(relevant_docs.get(idx, set()))
        retrieved_results = model.retrieve_score_sequential(idx)
        sliced_retrieved_results = retrieved_results[:num_relevant_docs]
        retrieved_docs[idx] = retrieved_results[:num_relevant_docs]

    retrieved_docs = {str(qid): [str(doc_id) for doc_id in docs] for qid, docs in retrieved_docs.items()}
    relevant_docs = {str(qid): {str(doc_id) for doc_id in docs} for qid, docs in relevant_docs.items()}

    print(retrieved_docs)

    query_ids = list(relevant_docs.keys())
    evaluator = Evaluation(retrieved_docs, relevant_docs, query_ids)

    results_file = results_file
    evaluator.save_evaluation_results(results_file)

{'1': ['60', '24', '364', '1051', '1246', '251', '398', '790', '335', '168', '958', '447', '1186', '596', '17', '489', '938', '1245', '236', '65', '725', '34', '746', '1443', '1276', '934', '652', '1383', '1299', '716', '616', '121', '1387', '198', '1124', '227', '16', '1416', '244', '1044', '1079', '359', '888', '544', '412', '643'], '2': ['1399', '790', '810', '768', '166', '778', '1276', '597', '1338', '198', '1035', '603', '421', '65', '806', '1309', '381', '1061', '237', '756', '746', '16', '477', '544', '1197', '364'], '3': ['60', '1077', '469', '364', '24', '1051', '1160', '236', '251', '21', '1246', '168', '160', '449', '1444', '1235', '244', '462', '706', '716', '454', '1450', '1371', '1373', '1137', '398', '1214', '803', '335', '1329', '158', '570', '42', '631', '1420', '90', '463', '369', '766', '371', '547', '938', '778', '92'], '4': ['746', '663', '790', '320', '608', '1213', '825', '421'], '5': ['364', '1105', '60', '236', '24', '323', '1246', '587', '1051', '483', '896',