SPARSE RETRIEVAL

In [1]:
!pip install -U beir

Collecting beir
  Downloading beir-0.2.3.tar.gz (52 kB)
[K     |████████████████████████████████| 52 kB 1.2 MB/s 
[?25hCollecting sentence-transformers
  Downloading sentence-transformers-2.1.0.tar.gz (78 kB)
[K     |████████████████████████████████| 78 kB 6.7 MB/s 
[?25hCollecting pytrec_eval
  Downloading pytrec_eval-0.5.tar.gz (15 kB)
Collecting faiss_cpu
  Downloading faiss_cpu-1.7.1.post2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (8.4 MB)
[K     |████████████████████████████████| 8.4 MB 43.1 MB/s 
[?25hCollecting elasticsearch
  Downloading elasticsearch-7.15.1-py2.py3-none-any.whl (378 kB)
[K     |████████████████████████████████| 378 kB 47.3 MB/s 
Collecting tensorflow-text
  Downloading tensorflow_text-2.6.0-cp37-cp37m-manylinux1_x86_64.whl (4.4 MB)
[K     |████████████████████████████████| 4.4 MB 35.8 MB/s 
Collecting transformers<5.0.0,>=4.6.0
  Downloading transformers-4.11.3-py3-none-any.whl (2.9 MB)
[K     |████████████████████████████████| 2.9 MB 

In [2]:
import pathlib, os
import time
import pandas as pd
from tqdm.notebook import tqdm
from tqdm.autonotebook import trange
from beir import util, LoggingHandler
from beir.retrieval import models
from beir.datasets.data_loader import GenericDataLoader
from beir.retrieval.evaluation import EvaluateRetrieval
from beir.retrieval.search.sparse import SparseSearch

  """


In [3]:
def load(dataset):
  hostname = 'localhost'
  index_name = dataset
  url = 'https://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/{}.zip'.format(dataset)
  out_dir = os.path.join(os.getcwd(), 'datasets')
  data_path = util.download_and_unzip(url, out_dir)
  return GenericDataLoader(data_path).load(split='test') # 'test', 'train', 'dev'

In [4]:
def retrieve(corpus, queries, qrels, model_name, batch_size):
  sparse_model = SparseSearch(models.SPARTA(model_name), batch_size=batch_size)
  retriever_sp = EvaluateRetrieval(sparse_model)
  results_sp = retriever_sp.retrieve(corpus, queries)
  return retriever_sp.evaluate(qrels, results_sp, retriever_sp.k_values)

In [5]:
def evaluate(corpus, queries, qrels, model_name, batch_size):
  ndcg, _map, recall, precision = retrieve(corpus, queries, qrels, model_name, batch_size)
  c_map = 'MAP@10'
  c_map_h = 'MAP@100'
  c_map_t = 'MAP@1000'
  c_ndcg = 'NDCG@10'
  c_ndcg_h = 'NDCG@100'
  c_ndcg_t = 'NDCG@1000'
  c_pre = 'P@10'
  c_pre_h = 'P@100'
  c_pre_t = 'P@1000'
  c_rec = 'Recall@10'
  c_rec_h = 'Recall@100'
  c_rec_t = 'Recall@1000'
  eval_dict = {
      c_map: [_map[c_map]], 
      c_map_h: [_map[c_map_h]],
      c_map_t: [_map[c_map_t]],
      c_ndcg: [ndcg[c_ndcg]], 
      c_ndcg_h: [ndcg[c_ndcg_h]],
      c_ndcg_t: [ndcg[c_ndcg_t]],
      c_pre: [precision[c_pre]], 
      c_pre_h: [precision[c_pre_h]],
      c_pre_t: [precision[c_pre_t]],
      c_rec: [recall[c_rec]],
      c_rec_h: [recall[c_rec_h]],
      c_rec_t: [recall[c_rec_t]]}
  eval_df = pd.DataFrame(data=eval_dict)
  eval_df.index = [model_name]
  return eval_df

In [6]:
model_db = 'BeIR/sparta-msmarco-distilbert-base-v1'
ag_corpus, ag_queries, ag_qrels = load('arguana')
fq_corpus, fq_queries, fq_qrels = load('fiqa')

/content/datasets/arguana.zip:   0%|          | 0.00/3.60M [00:00<?, ?iB/s]

  0%|          | 0/8674 [00:00<?, ?it/s]

/content/datasets/fiqa.zip:   0%|          | 0.00/17.1M [00:00<?, ?iB/s]

  0%|          | 0/57638 [00:00<?, ?it/s]

**TOUCHÉ** - [Homepage](https://webis.de/events/touche-20/) - [Paper](https://www.researchgate.net/publication/344371868_Overview_of_Touche_2020_Argument_Retrieval)

In [7]:
# tc_corpus, tc_queries, tc_qrels = load('webis-touche2020')
# evaluate(tc_corpus, tc_queries, tc_qrels, model_db, 128)

**ARGUANA** - [Homepage](http://argumentation.bplaced.net/arguana/data) - [Paper](https://aclanthology.org/P18-1023.pdf)

In [8]:
evaluate(ag_corpus, ag_queries, ag_qrels, model_db, 128)

Downloading:   0%|          | 0.00/258 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/517 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/226k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/112 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/253M [00:00<?, ?B/s]

docs:   0%|          | 0/68 [00:00<?, ?it/s]

query:   0%|          | 0/11 [00:00<?, ?it/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (826 > 512). Running this sequence through the model will result in indexing errors


Unnamed: 0,MAP@10,MAP@100,MAP@1000,NDCG@10,NDCG@100,NDCG@1000,P@10,P@100,P@1000,Recall@10,Recall@100,Recall@1000
BeIR/sparta-msmarco-distilbert-base-v1,0.23409,0.23918,0.23918,0.35291,0.38786,0.38786,0.07273,0.00909,0.00091,0.72727,0.90909,0.90909


**FIQA** - [Homepage](https://sites.google.com/view/fiqa/home) - [Paper](https://www.researchgate.net/publication/324629350_WWW'18_Open_Challenge_Financial_Opinion_Mining_and_Question_Answering)

In [9]:
evaluate(fq_corpus, fq_queries, fq_qrels, model_db, 128)

docs:   0%|          | 0/451 [00:00<?, ?it/s]

query:   0%|          | 0/6 [00:00<?, ?it/s]

Unnamed: 0,MAP@10,MAP@100,MAP@1000,NDCG@10,NDCG@100,NDCG@1000,P@10,P@100,P@1000,Recall@10,Recall@100,Recall@1000
BeIR/sparta-msmarco-distilbert-base-v1,0.28877,0.2971,0.29835,0.35508,0.37253,0.39958,0.11667,0.015,0.002,0.33333,0.375,0.58333


end of fun.