RE-RANKING

In [64]:
!pip install -U beir
!pip install 'elasticsearch<7.14.0'



In [65]:
import pathlib, os
import time
import pandas as pd
from beir import util
from beir.datasets.data_loader import GenericDataLoader
from beir.retrieval.search.lexical import BM25Search as BM25
from beir.retrieval.evaluation import EvaluateRetrieval
from beir.reranking.models import CrossEncoder
from beir.reranking import Rerank

In [66]:
if not os.path.exists('elasticsearch-oss-7.9.2-linux-x86_64.tar.gz'):
  !wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-oss-7.9.2-linux-x86_64.tar.gz
  !wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-oss-7.9.2-linux-x86_64.tar.gz.sha512
  !tar -xzf elasticsearch-oss-7.9.2-linux-x86_64.tar.gz
  !sudo chown -R daemon:daemon elasticsearch-7.9.2/
  !shasum -a 512 -c elasticsearch-oss-7.9.2-linux-x86_64.tar.gz.sha512 

In [67]:
%%bash --bg

sudo -H -u daemon elasticsearch-7.9.2/bin/elasticsearch

Starting job # 2 in a separate thread.


In [68]:
time.sleep(20)

In [69]:
!ps -ef | grep elasticsearch

root        1888    1886  0 09:54 ?        00:00:00 sudo -H -u daemon elasticsearch-7.9.2/bin/elasticsearch
daemon      1889    1888 88 09:54 ?        00:00:18 /content/elasticsearch-7.9.2/jdk/bin/java -Xshare:auto -Des.networkaddress.cache.ttl=60 -Des.networkaddress.cache.negative.ttl=10 -XX:+AlwaysPreTouch -Xss1m -Djava.awt.headless=true -Dfile.encoding=UTF-8 -Djna.nosys=true -XX:-OmitStackTraceInFastThrow -XX:+ShowCodeDetailsInExceptionMessages -Dio.netty.noUnsafe=true -Dio.netty.noKeySetOptimization=true -Dio.netty.recycler.maxCapacityPerThread=0 -Dio.netty.allocator.numDirectArenas=0 -Dlog4j.shutdownHookEnabled=false -Dlog4j2.disable.jmx=true -Djava.locale.providers=SPI,COMPAT -Xms1g -Xmx1g -XX:+UseG1GC -XX:G1ReservePercent=25 -XX:InitiatingHeapOccupancyPercent=30 -Djava.io.tmpdir=/tmp/elasticsearch-11878928863626721576 -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=data -XX:ErrorFile=logs/hs_err_pid%p.log -Xlog:gc*,gc+age=trace,safepoint:file=logs/gc.log:utctime,pid,tags:fileco

In [70]:
!curl -sX GET "localhost:9200/"

{
  "name" : "73126603a928",
  "cluster_name" : "elasticsearch",
  "cluster_uuid" : "hekVLOg_QciKqRB3PCCQug",
  "version" : {
    "number" : "7.9.2",
    "build_flavor" : "oss",
    "build_type" : "tar",
    "build_hash" : "d34da0ea4a966c4e49417f2da2f244e3e97b4e6e",
    "build_date" : "2020-09-23T00:45:33.626720Z",
    "build_snapshot" : false,
    "lucene_version" : "8.6.2",
    "minimum_wire_compatibility_version" : "6.8.0",
    "minimum_index_compatibility_version" : "6.0.0-beta1"
  },
  "tagline" : "You Know, for Search"
}


In [71]:
def retrieve_lexical(dataset):
  hostname = 'localhost' 
  url = 'https://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/{}.zip'
  out_dir = os.path.join(os.getcwd(), 'datasets')
  dataset_r = dataset
  index_r = dataset_r
  data_path_r = util.download_and_unzip(url.format(dataset_r), out_dir)
  corpus_r, queries_r, qrels_r = GenericDataLoader(data_path_r).load(split='test') # 'test', 'train', 'dev'
  model_r = BM25(index_name=index_r, hostname=hostname, initialize=True) # initialize=True : reindex
  retriever_r = EvaluateRetrieval(model_r)
  results_r = retriever_r.retrieve(corpus_r, queries_r)
  return corpus_r, queries_r, qrels_r, retriever_r, results_r

In [72]:
def rerank_model(reranker, corpus, queries, qrels, retriever, results, top_k):
  rerank_results = reranker.rerank(corpus, queries, results, top_k=top_k)
  return EvaluateRetrieval.evaluate(qrels, rerank_results, retriever.k_values)

In [73]:
def evaluate(name, ndcg, _map, recall, precision):
  c_map = 'MAP@10'
  c_map_h = 'MAP@100'
  c_map_t = 'MAP@1000'
  c_ndcg = 'NDCG@10'
  c_ndcg_h = 'NDCG@100'
  c_ndcg_t = 'NDCG@1000'
  c_pre = 'P@10'
  c_pre_h = 'P@100'
  c_pre_t = 'P@1000'
  c_rec = 'Recall@10'
  c_rec_h = 'Recall@100'
  c_rec_t = 'Recall@1000'
  eval_dict = {
      c_map: [_map[c_map]], 
      c_map_h: [_map[c_map_h]],
      c_map_t: [_map[c_map_t]],
      c_ndcg: [ndcg[c_ndcg]], 
      c_ndcg_h: [ndcg[c_ndcg_h]],
      c_ndcg_t: [ndcg[c_ndcg_t]],
      c_pre: [precision[c_pre]], 
      c_pre_h: [precision[c_pre_h]],
      c_pre_t: [precision[c_pre_t]],
      c_rec: [recall[c_rec]],
      c_rec_h: [recall[c_rec_h]],
      c_rec_t: [recall[c_rec_t]]
      }
  eval_df = pd.DataFrame(data=eval_dict)
  eval_df.index = [name]
  return eval_df

In [74]:
def evaluate_rerank(rerank, corpus, queries, qrels, retriever, results, top_k):
  r_ndcg, r_map, r_recall, r_precision = rerank_model(rerank['rr'], corpus, queries, qrels, retriever, results, top_k)
  return evaluate(rerank['name'], r_ndcg, r_map, r_recall, r_precision)

In [75]:
def run_all_models(models, corpus, queries, qrels, retriever, results, top_k):
  rr_evals = []
  for model in models:
      rr_evals.append(evaluate_rerank(model, corpus, queries, qrels, retriever, results, top_k))
  return rr_evals

In [76]:
cv_d = 'trec-covid'
sf_d = 'scifact'
sd_d = 'scidocs'
nf_d = 'nfcorpus'
cv_corpus, cv_queries, cv_qrels, cv_retriever, cv_results = retrieve_lexical(cv_d)
cv_ndcg, cv_map, cv_recall, cv_precision = cv_retriever.evaluate(cv_qrels, cv_results, cv_retriever.k_values)
sf_corpus, sf_queries, sf_qrels, sf_retriever, sf_results = retrieve_lexical(sf_d)
sf_ndcg, sf_map, sf_recall, sf_precision = sf_retriever.evaluate(sf_qrels, sf_results, sf_retriever.k_values)
sd_corpus, sd_queries, sd_qrels, sd_retriever, sd_results = retrieve_lexical(sd_d)
sd_ndcg, sd_map, sd_recall, sd_precision = sd_retriever.evaluate(sd_qrels, sd_results, sd_retriever.k_values)

  0%|          | 0/171332 [00:00<?, ?it/s]

  0%|          | 0/171332 [00:00<?, ?docs/s]
que: 100%|██████████| 1/1 [00:08<00:00,  8.29s/it]


  0%|          | 0/5183 [00:00<?, ?it/s]

  0%|          | 0/5183 [00:00<?, ?docs/s]
que: 100%|██████████| 3/3 [00:12<00:00,  4.09s/it]


  0%|          | 0/25657 [00:00<?, ?it/s]

  0%|          | 0/25657 [00:00<?, ?docs/s]
que: 100%|██████████| 8/8 [00:50<00:00,  6.37s/it]


In [77]:

nf_corpus, nf_queries, nf_qrels, nf_retriever, nf_results = retrieve_lexical(nf_d)
nf_ndcg, nf_map, nf_recall, nf_precision = nf_retriever.evaluate(nf_qrels, nf_results, nf_retriever.k_values)

  0%|          | 0/3633 [00:00<?, ?it/s]

  0%|          | 0/3633 [00:00<?, ?docs/s]
que: 100%|██████████| 3/3 [00:06<00:00,  2.11s/it]


In [78]:
lexical_eval = []
lexical_eval.append(evaluate(cv_d, cv_ndcg, cv_map, cv_recall, cv_precision))
lexical_eval.append(evaluate(sf_d, sf_ndcg, sf_map, sf_recall, sf_precision))
lexical_eval.append(evaluate(sd_d, sd_ndcg, sd_map, sd_recall, sd_precision))
lexical_eval.append(evaluate(nf_d, nf_ndcg, nf_map, nf_recall, nf_precision))

In [79]:
batch_size = 128
top_k = 100
cmtb = 'cross-encoder/ms-marco-TinyBERT-L-2-v2'
cmlm = 'cross-encoder/ms-marco-MiniLM-L-6-v2'
cmeb = 'cross-encoder/ms-marco-electra-base'
rr_cmtb = {'name': cmtb, 'rr': Rerank(CrossEncoder(cmtb), batch_size=batch_size)}
rr_cmlm = {'name': cmlm, 'rr': Rerank(CrossEncoder(cmlm), batch_size=batch_size)}
rr_cmeb = {'name': cmeb, 'rr': Rerank(CrossEncoder(cmeb), batch_size=batch_size)}
models = [rr_cmtb, rr_cmlm, rr_cmeb]
cv_res = run_all_models(models, cv_corpus, cv_queries, cv_qrels, cv_retriever, cv_results, top_k)
sf_res = run_all_models(models, sf_corpus, sf_queries, sf_qrels, sf_retriever, sf_results, top_k)
sd_res = run_all_models(models, sd_corpus, sd_queries, sd_qrels, sd_retriever, sd_results, top_k)
nf_res = run_all_models(models, nf_corpus, nf_queries, nf_qrels, nf_retriever, nf_results, top_k)

Batches:   0%|          | 0/40 [00:00<?, ?it/s]

Batches:   0%|          | 0/40 [00:00<?, ?it/s]

Batches:   0%|          | 0/40 [00:00<?, ?it/s]

Batches:   0%|          | 0/235 [00:00<?, ?it/s]

Batches:   0%|          | 0/235 [00:00<?, ?it/s]

Batches:   0%|          | 0/235 [00:00<?, ?it/s]

Batches:   0%|          | 0/781 [00:00<?, ?it/s]

Batches:   0%|          | 0/781 [00:00<?, ?it/s]

Batches:   0%|          | 0/781 [00:00<?, ?it/s]

Batches:   0%|          | 0/173 [00:00<?, ?it/s]

Batches:   0%|          | 0/173 [00:00<?, ?it/s]

Batches:   0%|          | 0/173 [00:00<?, ?it/s]



---



LEXICAL SEARCH

In [80]:
pd.concat(lexical_eval)

Unnamed: 0,MAP@10,MAP@100,MAP@1000,NDCG@10,NDCG@100,NDCG@1000,P@10,P@100,P@1000,Recall@10,Recall@100,Recall@1000
trec-covid,0.01698,0.08517,0.2084,0.68803,0.48454,0.433,0.734,0.5084,0.20194,0.01907,0.11731,0.41281
scifact,0.63897,0.64449,0.64478,0.68543,0.70876,0.7161,0.09,0.01037,0.0011,0.81311,0.91589,0.97333
scidocs,0.09637,0.11192,0.11417,0.16468,0.23043,0.2738,0.0857,0.0181,0.00285,0.17372,0.36757,0.57872
nfcorpus,0.12979,0.15365,0.15899,0.34355,0.28807,0.31493,0.2474,0.06701,0.01118,0.16615,0.25746,0.37525




---



**TREC-COVID** - [Homepage](https://ir.nist.gov/covidSubmit/index.html) - [Paper](https://arxiv.org/abs/2005.04474)

In [81]:
pd.concat(cv_res)

Unnamed: 0,MAP@10,MAP@100,MAP@1000,NDCG@10,NDCG@100,NDCG@1000,P@10,P@100,P@1000,Recall@10,Recall@100,Recall@1000
cross-encoder/ms-marco-TinyBERT-L-2-v2,0.01742,0.08865,0.08865,0.72834,0.49542,0.19315,0.78,0.5084,0.05084,0.01954,0.11731,0.11731
cross-encoder/ms-marco-MiniLM-L-6-v2,0.01862,0.09172,0.09172,0.75764,0.50038,0.19481,0.834,0.5084,0.05084,0.02116,0.11731,0.11731
cross-encoder/ms-marco-electra-base,0.01662,0.08839,0.08839,0.69748,0.49225,0.19198,0.752,0.5084,0.05084,0.01935,0.11731,0.11731


**SCIFACT** - [Homepage](https://allenai.org/data/scifact) - [Paper](https://arxiv.org/abs/2004.14974)

In [82]:
pd.concat(sf_res)

Unnamed: 0,MAP@10,MAP@100,MAP@1000,NDCG@10,NDCG@100,NDCG@1000,P@10,P@100,P@1000,Recall@10,Recall@100,Recall@1000
cross-encoder/ms-marco-TinyBERT-L-2-v2,0.62005,0.62817,0.62817,0.66318,0.69514,0.69514,0.08767,0.01037,0.00104,0.77761,0.91589,0.91589
cross-encoder/ms-marco-MiniLM-L-6-v2,0.64036,0.6467,0.6467,0.68631,0.71097,0.71097,0.091,0.01037,0.00104,0.81111,0.91589,0.91589
cross-encoder/ms-marco-electra-base,0.63333,0.63979,0.63979,0.67353,0.70299,0.70299,0.08867,0.01037,0.00104,0.78178,0.91589,0.91589


**SCIDOCS** - [Homepage](https://allenai.org/data/scidocs) - [Paper](https://arxiv.org/abs/2004.07180)

In [83]:
pd.concat(sd_res)

Unnamed: 0,MAP@10,MAP@100,MAP@1000,NDCG@10,NDCG@100,NDCG@1000,P@10,P@100,P@1000,Recall@10,Recall@100,Recall@1000
cross-encoder/ms-marco-TinyBERT-L-2-v2,0.09001,0.10564,0.10564,0.15293,0.22387,0.22387,0.0774,0.01811,0.00181,0.15692,0.36777,0.36777
cross-encoder/ms-marco-MiniLM-L-6-v2,0.09717,0.11372,0.11372,0.16578,0.23334,0.23334,0.0849,0.01811,0.00181,0.17212,0.36777,0.36777
cross-encoder/ms-marco-electra-base,0.08788,0.10452,0.10452,0.15338,0.22449,0.22449,0.0784,0.01811,0.00181,0.15898,0.36777,0.36777


**NFCORPUS** - [Homepage](https://www.cl.uni-heidelberg.de/statnlpgroup/nfcorpus/) - [Paper](https://www.cl.uni-heidelberg.de/~riezler/publications/papers/ECIR2016.pdf)

In [84]:
pd.concat(nf_res)

Unnamed: 0,MAP@10,MAP@100,MAP@1000,NDCG@10,NDCG@100,NDCG@1000,P@10,P@100,P@1000,Recall@10,Recall@100,Recall@1000
cross-encoder/ms-marco-TinyBERT-L-2-v2,0.13547,0.15963,0.15963,0.35284,0.29551,0.28214,0.24968,0.06695,0.00669,0.16776,0.25732,0.25732
cross-encoder/ms-marco-MiniLM-L-6-v2,0.14381,0.1685,0.1685,0.36572,0.3046,0.29108,0.25422,0.06695,0.00669,0.16965,0.25732,0.25732
cross-encoder/ms-marco-electra-base,0.13133,0.15502,0.15502,0.34408,0.29067,0.27741,0.24838,0.06695,0.00669,0.16728,0.25732,0.25732


end of fun.