RE-RANKING

In [None]:
!pip install -U beir
!pip install 'elasticsearch<7.14.0'

In [2]:
import pathlib, os
import time
import pandas as pd
from beir import util
from beir.datasets.data_loader import GenericDataLoader
from beir.retrieval.search.lexical import BM25Search as BM25
from beir.retrieval.evaluation import EvaluateRetrieval
from beir.reranking.models import CrossEncoder
from beir.reranking import Rerank

  from tqdm.autonotebook import tqdm


In [3]:
if not os.path.exists('elasticsearch-oss-7.9.2-linux-x86_64.tar.gz'):
  !wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-oss-7.9.2-linux-x86_64.tar.gz
  !wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-oss-7.9.2-linux-x86_64.tar.gz.sha512
  !tar -xzf elasticsearch-oss-7.9.2-linux-x86_64.tar.gz
  !sudo chown -R daemon:daemon elasticsearch-7.9.2/
  !shasum -a 512 -c elasticsearch-oss-7.9.2-linux-x86_64.tar.gz.sha512 

--2021-12-07 05:43:31--  https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-oss-7.9.2-linux-x86_64.tar.gz
Resolving artifacts.elastic.co (artifacts.elastic.co)... 34.120.127.130, 2600:1901:0:1d7::
Connecting to artifacts.elastic.co (artifacts.elastic.co)|34.120.127.130|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 229941304 (219M) [application/x-gzip]
Saving to: ‘elasticsearch-oss-7.9.2-linux-x86_64.tar.gz’


2021-12-07 05:44:09 (5.80 MB/s) - ‘elasticsearch-oss-7.9.2-linux-x86_64.tar.gz’ saved [229941304/229941304]

--2021-12-07 05:44:09--  https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-oss-7.9.2-linux-x86_64.tar.gz.sha512
Resolving artifacts.elastic.co (artifacts.elastic.co)... 34.120.127.130, 2600:1901:0:1d7::
Connecting to artifacts.elastic.co (artifacts.elastic.co)|34.120.127.130|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 173 [application/octet-stream]
Saving to: ‘elasticsearch-oss-7.9.2-

In [4]:
%%bash --bg

sudo -H -u daemon elasticsearch-7.9.2/bin/elasticsearch

Starting job # 0 in a separate thread.


In [5]:
time.sleep(10)

In [6]:
!ps -ef | grep elasticsearch

root         418     416  0 05:44 ?        00:00:00 sudo -H -u daemon elasticsearch-7.9.2/bin/elasticsearch
daemon       419     418 99 05:44 ?        00:00:18 /content/elasticsearch-7.9.2/jdk/bin/java -Xshare:auto -Des.networkaddress.cache.ttl=60 -Des.networkaddress.cache.negative.ttl=10 -XX:+AlwaysPreTouch -Xss1m -Djava.awt.headless=true -Dfile.encoding=UTF-8 -Djna.nosys=true -XX:-OmitStackTraceInFastThrow -XX:+ShowCodeDetailsInExceptionMessages -Dio.netty.noUnsafe=true -Dio.netty.noKeySetOptimization=true -Dio.netty.recycler.maxCapacityPerThread=0 -Dio.netty.allocator.numDirectArenas=0 -Dlog4j.shutdownHookEnabled=false -Dlog4j2.disable.jmx=true -Djava.locale.providers=SPI,COMPAT -Xms1g -Xmx1g -XX:+UseG1GC -XX:G1ReservePercent=25 -XX:InitiatingHeapOccupancyPercent=30 -Djava.io.tmpdir=/tmp/elasticsearch-11391299929225202314 -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=data -XX:ErrorFile=logs/hs_err_pid%p.log -Xlog:gc*,gc+age=trace,safepoint:file=logs/gc.log:utctime,pid,tags:fileco

In [7]:
!curl -sX GET "localhost:9200/"

In [8]:
def retrieve_lexical(dataset):
  hostname = 'localhost' 
  url = 'https://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/{}.zip'
  out_dir = os.path.join(os.getcwd(), 'datasets')
  dataset_r = dataset
  index_r = dataset_r
  data_path_r = util.download_and_unzip(url.format(dataset_r), out_dir)
  corpus_r, queries_r, qrels_r = GenericDataLoader(data_path_r).load(split='test') # 'test', 'train', 'dev'
  model_r = BM25(index_name=index_r, hostname=hostname, initialize=True) # initialize=True : reindex
  retriever_r = EvaluateRetrieval(model_r)
  results_r = retriever_r.retrieve(corpus_r, queries_r)
  return corpus_r, queries_r, qrels_r, retriever_r, results_r

In [9]:
def rerank_model(reranker, corpus, queries, qrels, retriever, results, top_k):
  rerank_results = reranker.rerank(corpus, queries, results, top_k=top_k)
  return EvaluateRetrieval.evaluate(qrels, rerank_results, retriever.k_values)

In [10]:
def evaluate(name, ndcg, _map, recall, precision):
  c_map = 'MAP@10'
  c_map_h = 'MAP@100'
  c_map_t = 'MAP@1000'
  c_ndcg = 'NDCG@10'
  c_ndcg_h = 'NDCG@100'
  c_ndcg_t = 'NDCG@1000'
  c_pre = 'P@10'
  c_pre_h = 'P@100'
  c_pre_t = 'P@1000'
  c_rec = 'Recall@10'
  c_rec_h = 'Recall@100'
  c_rec_t = 'Recall@1000'
  eval_dict = {
      c_map: [_map[c_map]], 
      c_map_h: [_map[c_map_h]],
      c_map_t: [_map[c_map_t]],
      c_ndcg: [ndcg[c_ndcg]], 
      c_ndcg_h: [ndcg[c_ndcg_h]],
      c_ndcg_t: [ndcg[c_ndcg_t]],
      c_pre: [precision[c_pre]], 
      c_pre_h: [precision[c_pre_h]],
      c_pre_t: [precision[c_pre_t]],
      c_rec: [recall[c_rec]],
      c_rec_h: [recall[c_rec_h]],
      c_rec_t: [recall[c_rec_t]]
      }
  eval_df = pd.DataFrame(data=eval_dict)
  eval_df.index = [name]
  return eval_df

In [11]:
def evaluate_rerank(rerank, corpus, queries, qrels, retriever, results, top_k):
  r_ndcg, r_map, r_recall, r_precision = rerank_model(rerank['rr'], corpus, queries, qrels, retriever, results, top_k)
  return evaluate(rerank['name'], r_ndcg, r_map, r_recall, r_precision)

In [12]:
def run_all_models(models, corpus, queries, qrels, retriever, results, top_k):
  rr_evals = []
  for model in models:
      rr_evals.append(evaluate_rerank(model, corpus, queries, qrels, retriever, results, top_k))
  return rr_evals

In [13]:
fq_d = 'fiqa'
ag_d = 'arguana'
tu_d = 'webis-touche2020'
qa_d = 'quora'
fq_corpus, fq_queries, fq_qrels, fq_retriever, fq_results = retrieve_lexical(fq_d)
fq_ndcg, fq_map, fq_recall, fq_precision = fq_retriever.evaluate(fq_qrels, fq_results, fq_retriever.k_values)
ag_corpus, ag_queries, ag_qrels, ag_retriever, ag_results = retrieve_lexical(ag_d)
ag_ndcg, ag_map, ag_recall, ag_precision = ag_retriever.evaluate(ag_qrels, ag_results, ag_retriever.k_values)
tu_corpus, tu_queries, tu_qrels, tu_retriever, tu_results = retrieve_lexical(tu_d)
tu_ndcg, tu_map, tu_recall, tu_precision = tu_retriever.evaluate(tu_qrels, tu_results, tu_retriever.k_values)
qa_corpus, qa_queries, qa_qrels, qa_retriever, qa_results = retrieve_lexical(qa_d)
qa_ndcg, qa_map, qa_recall, qa_precision = qa_retriever.evaluate(qa_qrels, qa_results, qa_retriever.k_values)

/content/datasets/fiqa.zip:   0%|          | 0.00/17.1M [00:00<?, ?iB/s]

  0%|          | 0/57638 [00:00<?, ?it/s]

  0%|          | 0/57638 [00:00<?, ?docs/s]
que: 100%|██████████| 6/6 [00:16<00:00,  2.67s/it]


/content/datasets/arguana.zip:   0%|          | 0.00/3.60M [00:00<?, ?iB/s]

  0%|          | 0/8674 [00:00<?, ?it/s]

  0%|          | 0/8674 [00:00<?, ?docs/s]
que: 100%|██████████| 11/11 [00:32<00:00,  2.98s/it]


/content/datasets/webis-touche2020.zip:   0%|          | 0.00/217M [00:00<?, ?iB/s]

  0%|          | 0/382545 [00:00<?, ?it/s]

  0%|          | 0/382545 [00:00<?, ?docs/s]
que: 100%|██████████| 1/1 [00:05<00:00,  5.94s/it]


/content/datasets/quora.zip:   0%|          | 0.00/15.1M [00:00<?, ?iB/s]

  0%|          | 0/522931 [00:00<?, ?it/s]

  0%|          | 0/522931 [00:00<?, ?docs/s]
que: 100%|██████████| 79/79 [03:13<00:00,  2.45s/it]


In [14]:
lexical_eval = []
lexical_eval.append(evaluate(fq_d, fq_ndcg, fq_map, fq_recall, fq_precision))
lexical_eval.append(evaluate(ag_d, ag_ndcg, ag_map, ag_recall, ag_precision))
lexical_eval.append(evaluate(tu_d, tu_ndcg, tu_map, tu_recall, tu_precision))
lexical_eval.append(evaluate(qa_d, qa_ndcg, qa_map, qa_recall, qa_precision))

In [None]:
batch_size = 128
top_k = 100
cmtb = 'cross-encoder/ms-marco-TinyBERT-L-2-v2'
cmlm = 'cross-encoder/ms-marco-MiniLM-L-6-v2'
cmeb = 'cross-encoder/ms-marco-electra-base'
rr_cmtb = {'name': cmtb, 'rr': Rerank(CrossEncoder(cmtb), batch_size=batch_size)}
rr_cmlm = {'name': cmlm, 'rr': Rerank(CrossEncoder(cmlm), batch_size=batch_size)}
rr_cmeb = {'name': cmeb, 'rr': Rerank(CrossEncoder(cmeb), batch_size=batch_size)}
models = [rr_cmtb, rr_cmlm, rr_cmeb]
fq_res = run_all_models(models, fq_corpus, fq_queries, fq_qrels, fq_retriever, fq_results, top_k)
ag_res = run_all_models(models, ag_corpus, ag_queries, ag_qrels, ag_retriever, ag_results, top_k)
tu_res = run_all_models(models, tu_corpus, tu_queries, tu_qrels, tu_retriever, tu_results, top_k)
qa_res = run_all_models(models, qa_corpus, qa_queries, qa_qrels, qa_retriever, qa_results, top_k)



---



LEXICAL SEARCH (BM25)

In [16]:
pd.concat(lexical_eval)

Unnamed: 0,MAP@10,MAP@100,MAP@1000,NDCG@10,NDCG@100,NDCG@1000,P@10,P@100,P@1000,Recall@10,Recall@100,Recall@1000
fiqa,0.19108,0.20437,0.20607,0.2537,0.31299,0.34747,0.07022,0.01306,0.00192,0.3244,0.54886,0.75883
arguana,0.38402,0.39444,0.39465,0.47184,0.51672,0.52189,0.07539,0.00952,0.00099,0.75391,0.95164,0.99147
webis-touche2020,0.13872,0.2211,0.23848,0.34707,0.45483,0.55796,0.33061,0.09531,0.01635,0.2122,0.56093,0.882
quora,0.75962,0.76689,0.76721,0.80771,0.82773,0.83123,0.12175,0.01447,0.00154,0.90219,0.97698,0.99573




---



**FIQA** - [Homepage](https://sites.google.com/view/fiqa/home) - [Paper](https://www.researchgate.net/publication/324629350_WWW'18_Open_Challenge_Financial_Opinion_Mining_and_Question_Answering)

In [17]:
pd.concat(fq_res)

Unnamed: 0,MAP@10,MAP@100,MAP@1000,NDCG@10,NDCG@100,NDCG@1000,P@10,P@100,P@1000,Recall@10,Recall@100,Recall@1000
cross-encoder/ms-marco-TinyBERT-L-2-v2,0.21365,0.22741,0.22741,0.28076,0.3354,0.3354,0.07639,0.01306,0.00131,0.3504,0.54886,0.54886
cross-encoder/ms-marco-MiniLM-L-6-v2,0.27601,0.28581,0.28581,0.34879,0.38578,0.38578,0.09444,0.01306,0.00131,0.41484,0.54886,0.54886
cross-encoder/ms-marco-electra-base,0.27795,0.28748,0.28748,0.34969,0.38706,0.38706,0.09506,0.01306,0.00131,0.41044,0.54886,0.54886


**ARGUANA** - [Homepage](http://argumentation.bplaced.net/arguana/data) - [Paper](https://aclanthology.org/P18-1023.pdf)

In [18]:
pd.concat(ag_res)

Unnamed: 0,MAP@10,MAP@100,MAP@1000,NDCG@10,NDCG@100,NDCG@1000,P@10,P@100,P@1000,Recall@10,Recall@100,Recall@1000
cross-encoder/ms-marco-TinyBERT-L-2-v2,0.26694,0.28279,0.28279,0.34016,0.41913,0.41913,0.05782,0.00952,0.00095,0.57824,0.95164,0.95164
cross-encoder/ms-marco-MiniLM-L-6-v2,0.33404,0.34773,0.34773,0.41645,0.4768,0.4768,0.06842,0.00952,0.00095,0.68421,0.95164,0.95164
cross-encoder/ms-marco-electra-base,0.24336,0.26152,0.26152,0.31069,0.40055,0.40055,0.05306,0.00952,0.00095,0.53058,0.95164,0.95164


**TOUCHÉ** - [Homepage](https://webis.de/events/touche-20/) - [Paper](https://www.researchgate.net/publication/344371868_Overview_of_Touche_2020_Argument_Retrieval)

In [19]:
pd.concat(tu_res)

Unnamed: 0,MAP@10,MAP@100,MAP@1000,NDCG@10,NDCG@100,NDCG@1000,P@10,P@100,P@1000,Recall@10,Recall@100,Recall@1000
cross-encoder/ms-marco-TinyBERT-L-2-v2,0.09835,0.17556,0.17556,0.25125,0.40985,0.40985,0.24286,0.09531,0.00953,0.17087,0.56093,0.56093
cross-encoder/ms-marco-MiniLM-L-6-v2,0.11157,0.19242,0.19242,0.27027,0.42348,0.42348,0.24898,0.09531,0.00953,0.17656,0.56093,0.56093
cross-encoder/ms-marco-electra-base,0.10467,0.18296,0.18296,0.27678,0.41752,0.41752,0.26939,0.09531,0.00953,0.18802,0.56093,0.56093


**QUORA** - [Homepage](https://quoradata.quora.com/First-Quora-Dataset-Release-Question-Pairs) - [Paper](https://arxiv.org/abs/1907.01041)

In [20]:
pd.concat(qa_res)

Unnamed: 0,MAP@10,MAP@100,MAP@1000,NDCG@10,NDCG@100,NDCG@1000,P@10,P@100,P@1000,Recall@10,Recall@100,Recall@1000
cross-encoder/ms-marco-TinyBERT-L-2-v2,0.75451,0.76225,0.76225,0.80274,0.82344,0.82344,0.12233,0.01448,0.00145,0.90084,0.97739,0.97739
cross-encoder/ms-marco-MiniLM-L-6-v2,0.78348,0.78974,0.78974,0.83098,0.84449,0.84449,0.12776,0.01448,0.00145,0.93167,0.97739,0.97739
cross-encoder/ms-marco-electra-base,0.72572,0.73271,0.73271,0.78385,0.80026,0.80026,0.12607,0.01448,0.00145,0.92003,0.97739,0.97739


end of fun.