# Set up BEIR

In [None]:
!nvidia-smi

Fri Mar 29 13:54:57 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla T4                       Off | 00000000:00:04.0 Off |                    0 |
| N/A   40C    P8               9W /  70W |      0MiB / 15360MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [None]:
!pip install beir

Collecting beir
  Downloading beir-2.0.0.tar.gz (53 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m53.6/53.6 kB[0m [31m722.4 kB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting sentence-transformers (from beir)
  Downloading sentence_transformers-2.6.1-py3-none-any.whl (163 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m163.3/163.3 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pytrec_eval (from beir)
  Downloading pytrec_eval-0.5.tar.gz (15 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting faiss_cpu (from beir)
  Downloading faiss_cpu-1.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (27.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m27.0/27.0 MB[0m [31m25.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting elasticsearch==7.9.1 (from beir)
  Downloading elasticsearch-7.9.1-py2.py3-none-any.whl (219 kB)
[2K     [90m━━━━━━━━

In [None]:
from beir import util, LoggingHandler

import logging
import pathlib, os

#### Just some code to print debug information to stdout
logging.basicConfig(format='%(asctime)s - %(message)s',
                    datefmt='%Y-%m-%d %H:%M:%S',
                    level=logging.INFO,
                    handlers=[LoggingHandler()])
#### /print debug information to stdout

  from tqdm.autonotebook import tqdm


# Setup FinBert

In [None]:
!pip install transformers numpy torch



In [None]:
from beir.retrieval.search.dense import DenseRetrievalExactSearch as DRES
from transformers import AutoModel, AutoTokenizer
import numpy as np
import torch
from tqdm import trange
import os
from typing import List, Dict

class FinBERT:
    def __init__(self, model_path: str, device, **kwargs):
        self.device = device
        self.bert_q = AutoModel.from_pretrained(model_path)
        # self.bert_q = AutoModel.from_pretrained(model_path, num_labels=3)
        self.bert_q.eval()
        self.bert_q.to(self.device)

        self.bert_d = AutoModel.from_pretrained(model_path)
        # self.bert_d = AutoModel.from_pretrained(model_path, num_labels=3)
        self.bert_d.eval()
        self.bert_d.to(self.device)

        self.tokenizer = AutoTokenizer.from_pretrained(model_path)


    def encode_queries(self, queries: List[str], batch_size: int = 16, **kwargs) -> np.ndarray:
        query_embeddings = []

        with torch.no_grad():
          for start_idx in trange(0, len(queries), batch_size):
            encoded = self.tokenizer(queries[start_idx:start_idx+batch_size], truncation=True, padding=True, return_tensors='pt', max_length=512)
            encoded.to(self.device)
            model_out = self.bert_q(**encoded)
            query_embeddings += model_out.last_hidden_state[:, 0, :].detach().cpu()

        return torch.stack(query_embeddings)

    def encode_corpus(self, corpus: List[Dict[str, str]], batch_size: int, **kwargs) -> np.ndarray:
        corpus_embeddings = []

        with torch.no_grad():
          for start_idx in trange(0, len(corpus), batch_size):
            titles = [row['title'] for row in corpus[start_idx: start_idx + batch_size]]
            texts = [row['text']  for row in corpus[start_idx: start_idx + batch_size]]
            encoded = self.tokenizer(titles, texts, truncation='longest_first', padding=True, return_tensors='pt', max_length=512)
            encoded.to(self.device)
            model_out = self.bert_d(**encoded)
            corpus_embeddings += model_out.last_hidden_state[:, 0, :].detach().cpu()

        return torch.stack(corpus_embeddings)

#Setup Datasets

In [None]:
import pathlib, os
from beir import util

def download_dataset(dataset_name: str):
  url = "https://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/{}.zip".format(dataset)
  out_dir = os.path.join(os.getcwd(), "datasets")
  data_path = util.download_and_unzip(url, out_dir)
  print("Dataset downloaded here: {}".format(data_path))
  return data_path

In [None]:
#!ls datasets/scifact/

In [None]:
from beir.datasets.data_loader import GenericDataLoader


#dataset = "scifact"
dataset = "fiqa"
#dataset="trec-covid"
data_path = download_dataset(dataset_name=dataset)
corpus, queries, qrels = GenericDataLoader(data_path).load(split="test") # or split = "train" or "dev"

/content/datasets/trec-covid.zip:   0%|          | 0.00/70.5M [00:00<?, ?iB/s]

Dataset downloaded here: /content/datasets/trec-covid


  0%|          | 0/171332 [00:00<?, ?it/s]

# Evaluate

In [None]:
from beir.retrieval.evaluation import EvaluateRetrieval

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

finbert = DRES(FinBERT(model_path="yiyanghkust/finbert-tone", device=device), batch_size=16)
retriever = EvaluateRetrieval(finbert, score_function="dot")

results = retriever.retrieve(corpus, queries)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/533 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/439M [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/226k [00:00<?, ?B/s]

100%|██████████| 4/4 [00:02<00:00,  1.83it/s]
100%|██████████| 3125/3125 [29:00<00:00,  1.80it/s]
100%|██████████| 3125/3125 [19:53<00:00,  2.62it/s]
100%|██████████| 3125/3125 [06:57<00:00,  7.48it/s]
100%|██████████| 1334/1334 [00:28<00:00, 46.45it/s]


In [None]:
ndcg, _map, recall, precision = EvaluateRetrieval.evaluate(qrels, results, retriever.k_values)
ndcg, _map, recall, precision

({'NDCG@1': 0.0,
  'NDCG@3': 0.01061,
  'NDCG@5': 0.01468,
  'NDCG@10': 0.01359,
  'NDCG@100': 0.00852,
  'NDCG@1000': 0.01057},
 {'MAP@1': 0.0,
  'MAP@3': 3e-05,
  'MAP@5': 8e-05,
  'MAP@10': 0.00011,
  'MAP@100': 0.00017,
  'MAP@1000': 0.00029},
 {'Recall@1': 0.0,
  'Recall@3': 6e-05,
  'Recall@5': 0.00031,
  'Recall@10': 0.00055,
  'Recall@100': 0.00194,
  'Recall@1000': 0.01097},
 {'P@1': 0.0,
  'P@3': 0.01333,
  'P@5': 0.02,
  'P@10': 0.018,
  'P@100': 0.0094,
  'P@1000': 0.00572})

In [None]:
mrr = retriever.evaluate_custom(qrels, results, retriever.k_values, metric="mrr")
recall_cap = retriever.evaluate_custom(qrels, results, retriever.k_values, metric="recall_cap")
hole = retriever.evaluate_custom(qrels, results, retriever.k_values, metric="hole")
top_k_accuracy = retriever.evaluate_custom(qrels, results, retriever.k_values, metric="top_k_accuracy")
mrr, recall_cap, hole, top_k_accuracy