DENSE RETRIEVAL

In [1]:
!pip install -U beir



In [2]:
import pathlib, os
import time
import pandas as pd
import random
import requests
import json
import torch
import torch.multiprocessing as mp
from tqdm.notebook import tqdm
from tqdm.autonotebook import trange
from beir import util, LoggingHandler
from beir.retrieval.evaluation import EvaluateRetrieval
from beir.retrieval import models
from beir.retrieval.search.dense import DenseRetrievalExactSearch as DRES
from beir.datasets.data_loader import GenericDataLoader

hostname = 'localhost' 
dataset = 'scifact'
index_name = dataset
url = 'https://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/{}.zip'.format(dataset)
out_dir = os.path.join(os.getcwd(), 'datasets')
data_path = util.download_and_unzip(url, out_dir)
corpus, queries, qrels = GenericDataLoader(data_path).load(split='test') # 'test', 'train', 'dev'
corpus_ids = list(corpus.keys())
corpus_list = [corpus[doc_id] for doc_id in corpus_ids]

  0%|          | 0/5183 [00:00<?, ?it/s]

In [3]:
def eval_metrics(model_name, ndcg, _map, recall, precision):
  c_map = 'MAP@10'
  c_map_h = 'MAP@100'
  c_map_t = 'MAP@1000'
  c_ndcg = 'NDCG@10'
  c_ndcg_h = 'NDCG@100'
  c_ndcg_t = 'NDCG@1000'
  c_pre = 'P@10'
  c_pre_h = 'P@100'
  c_pre_t = 'P@1000'
  c_rec = 'Recall@10'
  c_rec_h = 'Recall@100'
  c_rec_t = 'Recall@1000'
  eval_dict = {
      c_map: [_map[c_map]], 
      c_map_h: [_map[c_map_h]],
      c_map_t: [_map[c_map_t]],
      c_ndcg: [ndcg[c_ndcg]], 
      c_ndcg_h: [ndcg[c_ndcg_h]],
      c_ndcg_t: [ndcg[c_ndcg_t]],
      c_pre: [precision[c_pre]], 
      c_pre_h: [precision[c_pre_h]],
      c_pre_t: [precision[c_pre_t]],
      c_rec: [recall[c_rec]],
      c_rec_h: [recall[c_rec_h]],
      c_rec_t: [recall[c_rec_t]]}
  eval_df = pd.DataFrame(data=eval_dict)
  eval_df.index = [model_name]
  return eval_df

In [4]:
dr_all_mpnet_dot = EvaluateRetrieval(
    DRES(models.SentenceBERT('all-mpnet-base-v2'), batch_size=128), score_function='dot')
dr_all_drob_dot = EvaluateRetrieval(
    DRES(models.SentenceBERT('all-distilroberta-v1'), batch_size=128), score_function='dot')
dr_all_minilm_dot = EvaluateRetrieval(
    DRES(models.SentenceBERT('all-MiniLM-L12-v2'), batch_size=128), score_function='dot')

res_all_mpnet_dot = dr_all_mpnet_dot.retrieve(corpus, queries)
res_all_drob_dot = dr_all_drob_dot.retrieve(corpus, queries)
res_all_minilm_dot = dr_all_minilm_dot.retrieve(corpus, queries)

ndcg_d_amd, _map_d_amd, recall_d_amd, precision_d_amd = dr_all_mpnet_dot.evaluate(
    qrels, res_all_mpnet_dot, dr_all_mpnet_dot.k_values)
ndcg_d_add, _map_d_add, recall_d_add, precision_d_add = dr_all_drob_dot.evaluate(
    qrels, res_all_drob_dot, dr_all_drob_dot.k_values)
ndcg_d_amld, _map_d_amld, recall_d_amld, precision_d_amld = dr_all_minilm_dot.evaluate(
    qrels, res_all_minilm_dot, dr_all_minilm_dot.k_values)

Downloading:   0%|          | 0.00/737 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/9.86k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/653 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/116 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/15.7k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/349 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/329M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/239 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/333 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/13.1k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/798k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/190 [00:00<?, ?B/s]

Batches:   0%|          | 0/3 [00:00<?, ?it/s]

Batches:   0%|          | 0/41 [00:00<?, ?it/s]

Batches:   0%|          | 0/3 [00:00<?, ?it/s]

Batches:   0%|          | 0/41 [00:00<?, ?it/s]

Batches:   0%|          | 0/3 [00:00<?, ?it/s]

Batches:   0%|          | 0/41 [00:00<?, ?it/s]

In [5]:
eval_metrics('all-mpnet-base-v2', ndcg_d_amd, _map_d_amd, recall_d_amd, precision_d_amd)

Unnamed: 0,MAP@10,MAP@100,MAP@1000,NDCG@10,NDCG@100,NDCG@1000,P@10,P@100,P@1000,Recall@10,Recall@100,Recall@1000
all-mpnet-base-v2,0.60827,0.61595,0.6163,0.6557,0.68911,0.69653,0.08933,0.0107,0.00113,0.79011,0.94167,0.99667


In [6]:
eval_metrics('all-distilroberta-v1', ndcg_d_add, _map_d_add, recall_d_add, precision_d_add)

Unnamed: 0,MAP@10,MAP@100,MAP@1000,NDCG@10,NDCG@100,NDCG@1000,P@10,P@100,P@1000,Recall@10,Recall@100,Recall@1000
all-distilroberta-v1,0.5758,0.58185,0.58226,0.63146,0.65978,0.66977,0.089,0.01047,0.00113,0.78789,0.92,0.99667


In [7]:
eval_metrics('all-MiniLM-L12-v2', ndcg_d_amld, _map_d_amld, recall_d_amld, precision_d_amld)

Unnamed: 0,MAP@10,MAP@100,MAP@1000,NDCG@10,NDCG@100,NDCG@1000,P@10,P@100,P@1000,Recall@10,Recall@100,Recall@1000
all-MiniLM-L12-v2,0.57186,0.57863,0.57901,0.62639,0.65549,0.66497,0.08833,0.01037,0.00111,0.78233,0.91167,0.98333


In [8]:
dr_mqa_mpnet_dot = EvaluateRetrieval(
    DRES(models.SentenceBERT('multi-qa-mpnet-base-dot-v1'), batch_size=128), score_function='dot')
dr_mqa_minilm_cos = EvaluateRetrieval(
    DRES(models.SentenceBERT('multi-qa-MiniLM-L6-cos-v1'), batch_size=128), score_function='cos_sim')
dr_mqa_dist_dot = EvaluateRetrieval(
    DRES(models.SentenceBERT('multi-qa-distilbert-dot-v1'), batch_size=128), score_function='dot')
dr_mqa_dist_cos = EvaluateRetrieval(
    DRES(models.SentenceBERT('multi-qa-distilbert-cos-v1'), batch_size=128), score_function='cos_sim')

res_mqa_mpnet_dot = dr_mqa_mpnet_dot.retrieve(corpus, queries)
res_mqa_minilm_cos = dr_mqa_minilm_cos.retrieve(corpus, queries)
res_mqa_dist_dot = dr_mqa_dist_dot.retrieve(corpus, queries)
res_mqa_dist_cos = dr_mqa_dist_cos.retrieve(corpus, queries)

ndcg_d_mmd, _map_d_mmd, recall_d_mmd, precision_d_mmd = dr_mqa_mpnet_dot.evaluate(
    qrels, res_mqa_mpnet_dot, dr_mqa_mpnet_dot.k_values)
ndcg_d_mlc, _map_d_mlc, recall_d_mlc, precision_d_mlc = dr_mqa_minilm_cos.evaluate(
    qrels, res_mqa_minilm_cos, dr_mqa_minilm_cos.k_values)
ndcg_d_mdd, _map_d_mdd, recall_d_mdd, precision_d_mdd = dr_mqa_dist_dot.evaluate(
    qrels, res_mqa_dist_dot, dr_mqa_dist_dot.k_values)
ndcg_d_mdc, _map_d_mdc, recall_d_mdc, precision_d_mdc = dr_mqa_dist_cos.evaluate(
    qrels, res_mqa_dist_cos, dr_mqa_dist_cos.k_values)

Batches:   0%|          | 0/3 [00:00<?, ?it/s]

Batches:   0%|          | 0/41 [00:00<?, ?it/s]

Batches:   0%|          | 0/3 [00:00<?, ?it/s]

Batches:   0%|          | 0/41 [00:00<?, ?it/s]

Batches:   0%|          | 0/3 [00:00<?, ?it/s]

Batches:   0%|          | 0/41 [00:00<?, ?it/s]

Batches:   0%|          | 0/3 [00:00<?, ?it/s]

Batches:   0%|          | 0/41 [00:00<?, ?it/s]

In [9]:
eval_metrics('multi-qa-mpnet-base-dot-v1', ndcg_d_mmd, _map_d_mmd, recall_d_mmd, precision_d_mmd)

Unnamed: 0,MAP@10,MAP@100,MAP@1000,NDCG@10,NDCG@100,NDCG@1000,P@10,P@100,P@1000,Recall@10,Recall@100,Recall@1000
multi-qa-mpnet-base-dot-v1,0.54151,0.54938,0.55,0.58907,0.62253,0.63794,0.082,0.00993,0.00112,0.71756,0.86833,0.98667


In [10]:
eval_metrics('multi-qa-MiniLM-L6-cos-v1', ndcg_d_mlc, _map_d_mlc, recall_d_mlc, precision_d_mlc)

Unnamed: 0,MAP@10,MAP@100,MAP@1000,NDCG@10,NDCG@100,NDCG@1000,P@10,P@100,P@1000,Recall@10,Recall@100,Recall@1000
multi-qa-MiniLM-L6-cos-v1,0.49919,0.5091,0.50955,0.54029,0.58909,0.60131,0.07433,0.01,0.00111,0.65011,0.88033,0.97667


In [11]:
eval_metrics('multi-qa-distilbert-dot-v1', ndcg_d_mdd, _map_d_mdd, recall_d_mdd, precision_d_mdd)

Unnamed: 0,MAP@10,MAP@100,MAP@1000,NDCG@10,NDCG@100,NDCG@1000,P@10,P@100,P@1000,Recall@10,Recall@100,Recall@1000
multi-qa-distilbert-dot-v1,0.49725,0.50593,0.50636,0.54839,0.59058,0.60085,0.07733,0.01007,0.0011,0.68761,0.88822,0.96667


In [12]:
eval_metrics('multi-qa-distilbert-cos-v1', ndcg_d_mdc, _map_d_mdc, recall_d_mdc, precision_d_mdc)

Unnamed: 0,MAP@10,MAP@100,MAP@1000,NDCG@10,NDCG@100,NDCG@1000,P@10,P@100,P@1000,Recall@10,Recall@100,Recall@1000
multi-qa-distilbert-cos-v1,0.54929,0.55748,0.55783,0.5957,0.63221,0.64253,0.08167,0.01007,0.0011,0.72161,0.88533,0.96933


In [13]:
dr_msm_dist_cls = EvaluateRetrieval(
    DRES(models.SentenceBERT('msmarco-distilbert-base-tas-b'), batch_size=128), score_function='dot')
dr_msm_dist_dot = EvaluateRetrieval(
    DRES(models.SentenceBERT('msmarco-distilbert-dot-v5'), batch_size=128), score_function='dot')
dr_msm_con_dot = EvaluateRetrieval(
    DRES(models.SentenceBERT('msmarco-bert-co-condensor'), batch_size=128), score_function='dot')
dr_msm_ance_dot = EvaluateRetrieval(
    DRES(models.SentenceBERT('msmarco-roberta-base-ance-firstp')), score_function='dot')

res_msm_dist_cls = dr_msm_dist_cls.retrieve(corpus, queries)
res_msm_dist_dot = dr_msm_dist_dot.retrieve(corpus, queries)
res_msm_con_dot = dr_msm_con_dot.retrieve(corpus, queries)
res_msm_ance_dot = dr_msm_ance_dot.retrieve(corpus, queries)

ndcg_d_sdc, _map_d_sdc, recall_d_sdc, precision_d_sdc = dr_msm_dist_cls.evaluate(
    qrels, res_msm_dist_cls, dr_msm_dist_cls.k_values)
ndcg_d_sdd, _map_d_sdd, recall_d_sdd, precision_d_sdd = dr_msm_dist_dot.evaluate(
    qrels, res_msm_dist_dot, dr_msm_dist_dot.k_values)
ndcg_d_scd, _map_d_scd, recall_d_scd, precision_d_scd = dr_msm_con_dot.evaluate(
    qrels, res_msm_con_dot, dr_msm_con_dot.k_values)
ndcg_d_sad, _map_d_sad, recall_d_sad, precision_d_sad = dr_msm_ance_dot.evaluate(
    qrels, res_msm_ance_dot, dr_msm_ance_dot.k_values)

Batches:   0%|          | 0/3 [00:00<?, ?it/s]

Batches:   0%|          | 0/41 [00:00<?, ?it/s]

Batches:   0%|          | 0/3 [00:00<?, ?it/s]

Batches:   0%|          | 0/41 [00:00<?, ?it/s]

Batches:   0%|          | 0/3 [00:00<?, ?it/s]

Batches:   0%|          | 0/41 [00:00<?, ?it/s]

Batches:   0%|          | 0/3 [00:00<?, ?it/s]

Batches:   0%|          | 0/41 [00:00<?, ?it/s]

In [14]:
eval_metrics('msmarco-distilbert-base-tas-b', ndcg_d_sdc, _map_d_sdc, recall_d_sdc, precision_d_sdc)

Unnamed: 0,MAP@10,MAP@100,MAP@1000,NDCG@10,NDCG@100,NDCG@1000,P@10,P@100,P@1000,Recall@10,Recall@100,Recall@1000
msmarco-distilbert-base-tas-b,0.59916,0.60459,0.60493,0.64276,0.66983,0.68106,0.08633,0.01013,0.00111,0.7615,0.891,0.98333


In [15]:
eval_metrics('msmarco-distilbert-dot-v5', ndcg_d_sdd, _map_d_sdd, recall_d_sdd, precision_d_sdd)

Unnamed: 0,MAP@10,MAP@100,MAP@1000,NDCG@10,NDCG@100,NDCG@1000,P@10,P@100,P@1000,Recall@10,Recall@100,Recall@1000
msmarco-distilbert-dot-v5,0.55076,0.55811,0.55847,0.5949,0.62866,0.63907,0.081,0.00993,0.00108,0.71483,0.87033,0.95333


In [16]:
eval_metrics('msmarco-bert-co-condensor', ndcg_d_scd, _map_d_scd, recall_d_scd, precision_d_scd)

Unnamed: 0,MAP@10,MAP@100,MAP@1000,NDCG@10,NDCG@100,NDCG@1000,P@10,P@100,P@1000,Recall@10,Recall@100,Recall@1000
msmarco-bert-co-condensor,0.54942,0.55659,0.55693,0.60019,0.63829,0.6465,0.083,0.0105,0.00112,0.74056,0.92767,0.99


In [17]:
eval_metrics('msmarco-roberta-base-ance-firstp', ndcg_d_sad, _map_d_sad, recall_d_sad, precision_d_sad)

Unnamed: 0,MAP@10,MAP@100,MAP@1000,NDCG@10,NDCG@100,NDCG@1000,P@10,P@100,P@1000,Recall@10,Recall@100,Recall@1000
msmarco-roberta-base-ance-firstp,0.46612,0.47323,0.47384,0.5114,0.54931,0.56652,0.072,0.00933,0.00109,0.63333,0.81867,0.95667


end of fun.