# Tuning the Top-k Query parameter

First we set up the environment

In [2]:
from dotenv import load_dotenv
import os
import psycopg
import pandas as pd
import numpy as np
from time import time
import torch
from TextEnrichers import get_enricher, TextEnricher
from database.database import Database
from Embedders import Embedder, get_embedder
from tqdm import tqdm

load_dotenv('.env', override=True)
print(os.getenv('DB_PORT'))


5432


In [3]:
# Database setup
db = Database()
db.test_connection()

device = 'cuda' if torch.cuda.is_available(
) else 'mps' if torch.mps.is_available() else 'cpu'
print(f"Using device: {device}")

Database         User             Host                             Port            
citeline_db      bbasseri         localhost                        5432            
Database version: ('PostgreSQL 17.3 (Homebrew) on x86_64-apple-darwin23.6.0, compiled by Apple clang version 16.0.0 (clang-1600.0.26.6), 64-bit',)
Using device: mps


## Investigating precision over `k`

For our various embedding models and enrichment strategies, we want to know the smallest `top_k` value that will still retrieve the target reference for a given sentence. 

To investigate this, we'll sample 100 examples from the non-trivial training data. Each example typically has 1-2 target DOI's. For each example, we'll query the database with a large `top_k` parameter to start, so we can be sure the database returns the target references. Then we can ask at what index in the query results does a target DOI first appear. Ideally, the ranks will all be very high, indicated by having *low* indices in the query results. We also expect enriched examples to have their target doi's higher ranked (lower indices).

In [None]:
data = pd.read_json('data/dataset/split/train.jsonl', lines=True)
examples = data.sample(100, random_state=42)
examples.head()


def lowest_index_matching_doi(target_doi: str, query_results: list) -> int:
    """
    Returns the first index of the query results where the chunk doi matches the target doi.
    If no match is found, returns -1.
    """
    for i, result in enumerate(query_results):
        if target_doi == result.doi:
            return i
    return -1


def get_ranks(
    example: pd.Series,
    embedding,
    # embedder: Embedder,
    # enricher: TextEnricher,
    table_name: str,
    top_k: int,
    ef_search: int,
    metric: str = 'vector_cosine_ops'
) -> list[int]:
    target_dois = example['citation_dois']

    # Query
    start = time()
    query_results = db.query_vector_table(
        query_vector=embedding,
        target_column='bge_norm',
        table_name='library',
        top_k=top_k,
        use_index=True,
        ef_search=ef_search
    )
    
    ranks = [lowest_index_matching_doi(
        target_doi=doi, query_results=query_results) for doi in target_dois]
    return ranks

In [9]:
from itertools import product


embedding_models = ['BAAI/bge-small-en']
if device == 'cuda':
    embedding_models += ['bert-base-uncased', 'adsabs/astroBERT']
combos = list(product(embedding_models, TextEnricher.ENRICHMENT_FN.keys()))
print(f"Combos: {combos}")

Combos: [('BAAI/bge-small-en', 'identity'), ('BAAI/bge-small-en', 'add_abstract'), ('BAAI/bge-small-en', 'add_title'), ('BAAI/bge-small-en', 'add_title_and_abstract')]


In [13]:
# Dict mapping "model name": pd.DataFrame() of results (enrichment functions used -> columns)
results = {model: pd.DataFrame() for model in embedding_models}
top_k = 5000

for embedding_model, enricher_name in combos:
    print(f"Embedding model: {embedding_model}, Enricher: {enricher_name}")
    embedder = get_embedder(embedding_model, device=device)
    
    # Enrich
    enricher = get_enricher(enricher_name)
    texts_with_dois = list(
        examples[['sent_no_cit', 'source_doi']].itertuples(index=False, name=None))
    enriched_texts = enricher.enrich_batch(texts_with_dois)


    embeddings = embedder(enriched_texts)
    ranks = []
    for i in tqdm(range(len(examples))):
        embedding = embeddings[i]
        example = examples.iloc[i]
        ranks += get_ranks(
            example=example,
            embedding=embedding,
            # embedder=embedder,
            # enricher=enricher,
            table_name='library',
            top_k=top_k,
            ef_search=top_k)
    series = pd.Series(ranks)
    results[embedding_model][enricher_name] = series

model_names_to_filenames = {
    'BAAI/bge-small-en': 'bge',
    'bert-base-uncased': 'bert',
    'adsabs/astroBERT': 'astrobert'
}

# Save the results to CSV files
for model_name, df in results.items():
    filename = f"tests/{model_names_to_filenames[model_name]}_ranks_topk{top_k}.csv"
    df.to_csv(filename, index=False)
    print(f"Saved results for {model_name} to {filename}")

Embedding model: BAAI/bge-small-en, Enricher: identity


  0%|          | 0/100 [00:00<?, ?it/s]



  1%|          | 1/100 [00:00<01:35,  1.04it/s]

  Query execution time: 0.78 seconds


  2%|▏         | 2/100 [00:01<01:06,  1.47it/s]

  Query execution time: 0.42 seconds


  3%|▎         | 3/100 [00:01<00:57,  1.67it/s]

  Query execution time: 0.42 seconds


  4%|▍         | 4/100 [00:02<00:43,  2.21it/s]

  Query execution time: 0.20 seconds


  5%|▌         | 5/100 [00:02<00:36,  2.60it/s]

  Query execution time: 0.23 seconds


  6%|▌         | 6/100 [00:02<00:34,  2.71it/s]

  Query execution time: 0.30 seconds


  7%|▋         | 7/100 [00:03<00:31,  2.93it/s]

  Query execution time: 0.26 seconds


  8%|▊         | 8/100 [00:03<00:34,  2.66it/s]

  Query execution time: 0.41 seconds


  9%|▉         | 9/100 [00:04<00:39,  2.32it/s]

  Query execution time: 0.51 seconds


 10%|█         | 10/100 [00:04<00:34,  2.59it/s]

  Query execution time: 0.26 seconds


 11%|█         | 11/100 [00:04<00:30,  2.89it/s]

  Query execution time: 0.23 seconds


 12%|█▏        | 12/100 [00:05<00:33,  2.63it/s]

  Query execution time: 0.43 seconds


 13%|█▎        | 13/100 [00:05<00:35,  2.48it/s]

  Query execution time: 0.44 seconds


 14%|█▍        | 14/100 [00:05<00:34,  2.48it/s]

  Query execution time: 0.37 seconds


 15%|█▌        | 15/100 [00:06<00:31,  2.70it/s]

  Query execution time: 0.26 seconds


 16%|█▌        | 16/100 [00:06<00:33,  2.48it/s]

  Query execution time: 0.44 seconds


 17%|█▋        | 17/100 [00:07<00:32,  2.55it/s]

  Query execution time: 0.34 seconds


 18%|█▊        | 18/100 [00:07<00:30,  2.70it/s]

  Query execution time: 0.29 seconds


 19%|█▉        | 19/100 [00:07<00:27,  3.00it/s]

  Query execution time: 0.20 seconds


 20%|██        | 20/100 [00:07<00:26,  3.00it/s]

  Query execution time: 0.30 seconds


 21%|██        | 21/100 [00:08<00:28,  2.80it/s]

  Query execution time: 0.36 seconds


 22%|██▏       | 22/100 [00:08<00:29,  2.61it/s]

  Query execution time: 0.39 seconds


 23%|██▎       | 23/100 [00:09<00:28,  2.72it/s]

  Query execution time: 0.28 seconds


 24%|██▍       | 24/100 [00:09<00:25,  2.95it/s]

  Query execution time: 0.25 seconds


 25%|██▌       | 25/100 [00:09<00:26,  2.78it/s]

  Query execution time: 0.38 seconds


 26%|██▌       | 26/100 [00:10<00:24,  3.02it/s]

  Query execution time: 0.24 seconds


 27%|██▋       | 27/100 [00:10<00:23,  3.08it/s]

  Query execution time: 0.28 seconds


 28%|██▊       | 28/100 [00:10<00:21,  3.40it/s]

  Query execution time: 0.20 seconds


 29%|██▉       | 29/100 [00:10<00:22,  3.19it/s]

  Query execution time: 0.33 seconds


 30%|███       | 30/100 [00:11<00:21,  3.29it/s]

  Query execution time: 0.25 seconds


 31%|███       | 31/100 [00:11<00:19,  3.47it/s]

  Query execution time: 0.23 seconds
  Query execution time: 0.25 seconds


 32%|███▏      | 32/100 [00:12<00:24,  2.82it/s]



 33%|███▎      | 33/100 [00:12<00:21,  3.11it/s]

  Query execution time: 0.22 seconds
  Query execution time: 0.17 seconds


 34%|███▍      | 34/100 [00:12<00:18,  3.52it/s]

  Query execution time: 0.20 seconds


 35%|███▌      | 35/100 [00:12<00:17,  3.74it/s]



 36%|███▌      | 36/100 [00:12<00:17,  3.62it/s]

  Query execution time: 0.26 seconds


 37%|███▋      | 37/100 [00:13<00:18,  3.37it/s]

  Query execution time: 0.31 seconds


 38%|███▊      | 38/100 [00:13<00:18,  3.33it/s]

  Query execution time: 0.27 seconds


 39%|███▉      | 39/100 [00:13<00:18,  3.31it/s]

  Query execution time: 0.28 seconds


 40%|████      | 40/100 [00:14<00:17,  3.42it/s]

  Query execution time: 0.24 seconds


 41%|████      | 41/100 [00:14<00:17,  3.38it/s]

  Query execution time: 0.28 seconds


 42%|████▏     | 42/100 [00:14<00:18,  3.14it/s]

  Query execution time: 0.34 seconds


 43%|████▎     | 43/100 [00:15<00:17,  3.24it/s]

  Query execution time: 0.26 seconds


 44%|████▍     | 44/100 [00:15<00:15,  3.53it/s]

  Query execution time: 0.19 seconds


 45%|████▌     | 45/100 [00:15<00:15,  3.61it/s]

  Query execution time: 0.23 seconds


 46%|████▌     | 46/100 [00:16<00:16,  3.22it/s]

  Query execution time: 0.36 seconds


 47%|████▋     | 47/100 [00:16<00:16,  3.30it/s]

  Query execution time: 0.26 seconds


 48%|████▊     | 48/100 [00:16<00:15,  3.43it/s]

  Query execution time: 0.23 seconds


 49%|████▉     | 49/100 [00:16<00:14,  3.43it/s]

  Query execution time: 0.26 seconds


 50%|█████     | 50/100 [00:17<00:13,  3.60it/s]

  Query execution time: 0.21 seconds


 51%|█████     | 51/100 [00:17<00:14,  3.35it/s]

  Query execution time: 0.31 seconds


 52%|█████▏    | 52/100 [00:18<00:18,  2.65it/s]

  Query execution time: 0.52 seconds


 53%|█████▎    | 53/100 [00:18<00:17,  2.66it/s]

  Query execution time: 0.34 seconds


 54%|█████▍    | 54/100 [00:18<00:15,  2.89it/s]

  Query execution time: 0.24 seconds


 55%|█████▌    | 55/100 [00:19<00:16,  2.76it/s]

  Query execution time: 0.37 seconds


 56%|█████▌    | 56/100 [00:19<00:15,  2.87it/s]

  Query execution time: 0.29 seconds


 57%|█████▋    | 57/100 [00:19<00:17,  2.40it/s]

  Query execution time: 0.54 seconds


 58%|█████▊    | 58/100 [00:20<00:16,  2.62it/s]

  Query execution time: 0.26 seconds


 59%|█████▉    | 59/100 [00:20<00:14,  2.78it/s]

  Query execution time: 0.28 seconds


 60%|██████    | 60/100 [00:20<00:14,  2.78it/s]

  Query execution time: 0.30 seconds


 61%|██████    | 61/100 [00:21<00:15,  2.44it/s]

  Query execution time: 0.49 seconds


 62%|██████▏   | 62/100 [00:21<00:14,  2.64it/s]

  Query execution time: 0.28 seconds


 63%|██████▎   | 63/100 [00:22<00:13,  2.65it/s]

  Query execution time: 0.33 seconds


 64%|██████▍   | 64/100 [00:22<00:13,  2.66it/s]

  Query execution time: 0.35 seconds


 65%|██████▌   | 65/100 [00:22<00:12,  2.88it/s]

  Query execution time: 0.25 seconds


 66%|██████▌   | 66/100 [00:23<00:13,  2.54it/s]

  Query execution time: 0.47 seconds


 67%|██████▋   | 67/100 [00:23<00:11,  2.87it/s]

  Query execution time: 0.21 seconds


 68%|██████▊   | 68/100 [00:23<00:10,  3.08it/s]

  Query execution time: 0.22 seconds


 69%|██████▉   | 69/100 [00:24<00:10,  2.94it/s]

  Query execution time: 0.35 seconds


 70%|███████   | 70/100 [00:24<00:11,  2.73it/s]

  Query execution time: 0.39 seconds


 71%|███████   | 71/100 [00:25<00:11,  2.48it/s]

  Query execution time: 0.46 seconds


 72%|███████▏  | 72/100 [00:25<00:11,  2.40it/s]

  Query execution time: 0.41 seconds


 73%|███████▎  | 73/100 [00:25<00:10,  2.46it/s]

  Query execution time: 0.35 seconds


 74%|███████▍  | 74/100 [00:26<00:12,  2.07it/s]

  Query execution time: 0.62 seconds


 75%|███████▌  | 75/100 [00:27<00:12,  2.07it/s]

  Query execution time: 0.44 seconds


 76%|███████▌  | 76/100 [00:27<00:10,  2.24it/s]

  Query execution time: 0.34 seconds


 77%|███████▋  | 77/100 [00:27<00:08,  2.57it/s]

  Query execution time: 0.22 seconds


 78%|███████▊  | 78/100 [00:28<00:10,  2.10it/s]

  Query execution time: 0.49 seconds


 79%|███████▉  | 79/100 [00:28<00:08,  2.38it/s]

  Query execution time: 0.24 seconds


 80%|████████  | 80/100 [00:28<00:07,  2.65it/s]

  Query execution time: 0.25 seconds


 81%|████████  | 81/100 [00:29<00:08,  2.34it/s]

  Query execution time: 0.51 seconds


 82%|████████▏ | 82/100 [00:29<00:06,  2.60it/s]

  Query execution time: 0.27 seconds


 83%|████████▎ | 83/100 [00:30<00:06,  2.61it/s]

  Query execution time: 0.35 seconds


 84%|████████▍ | 84/100 [00:30<00:05,  2.81it/s]

  Query execution time: 0.26 seconds


 85%|████████▌ | 85/100 [00:31<00:06,  2.31it/s]

  Query execution time: 0.58 seconds


 86%|████████▌ | 86/100 [00:31<00:06,  2.31it/s]

  Query execution time: 0.40 seconds


 87%|████████▋ | 87/100 [00:31<00:05,  2.49it/s]

  Query execution time: 0.30 seconds


 88%|████████▊ | 88/100 [00:32<00:04,  2.58it/s]

  Query execution time: 0.33 seconds


 89%|████████▉ | 89/100 [00:32<00:03,  2.89it/s]

  Query execution time: 0.23 seconds


 90%|█████████ | 90/100 [00:32<00:03,  3.03it/s]

  Query execution time: 0.27 seconds


 91%|█████████ | 91/100 [00:33<00:02,  3.02it/s]

  Query execution time: 0.29 seconds


 92%|█████████▏| 92/100 [00:33<00:02,  3.01it/s]

  Query execution time: 0.30 seconds


 93%|█████████▎| 93/100 [00:33<00:02,  3.11it/s]

  Query execution time: 0.28 seconds


 94%|█████████▍| 94/100 [00:34<00:02,  2.93it/s]

  Query execution time: 0.37 seconds


 95%|█████████▌| 95/100 [00:34<00:01,  3.04it/s]

  Query execution time: 0.26 seconds


 96%|█████████▌| 96/100 [00:34<00:01,  2.89it/s]

  Query execution time: 0.36 seconds


 97%|█████████▋| 97/100 [00:35<00:01,  2.99it/s]

  Query execution time: 0.27 seconds


 98%|█████████▊| 98/100 [00:35<00:00,  2.91it/s]

  Query execution time: 0.32 seconds


 99%|█████████▉| 99/100 [00:36<00:00,  2.27it/s]

  Query execution time: 0.62 seconds


100%|██████████| 100/100 [00:36<00:00,  2.74it/s]

  Query execution time: 0.34 seconds
Embedding model: BAAI/bge-small-en, Enricher: add_abstract



  0%|          | 0/100 [00:00<?, ?it/s]

  Query execution time: 0.20 seconds


  1%|          | 1/100 [00:00<00:23,  4.29it/s]



  2%|▏         | 2/100 [00:00<00:28,  3.38it/s]

  Query execution time: 0.30 seconds


  3%|▎         | 3/100 [00:00<00:32,  3.02it/s]

  Query execution time: 0.29 seconds


  4%|▍         | 4/100 [00:01<00:40,  2.37it/s]

  Query execution time: 0.48 seconds


  5%|▌         | 5/100 [00:01<00:38,  2.44it/s]

  Query execution time: 0.37 seconds


  6%|▌         | 6/100 [00:02<00:33,  2.84it/s]

  Query execution time: 0.22 seconds


  7%|▋         | 7/100 [00:02<00:34,  2.67it/s]

  Query execution time: 0.40 seconds


  8%|▊         | 8/100 [00:02<00:34,  2.64it/s]

  Query execution time: 0.35 seconds


  9%|▉         | 9/100 [00:03<00:35,  2.57it/s]

  Query execution time: 0.38 seconds


 10%|█         | 10/100 [00:03<00:36,  2.49it/s]

  Query execution time: 0.40 seconds


 11%|█         | 11/100 [00:04<00:32,  2.77it/s]

  Query execution time: 0.23 seconds


 12%|█▏        | 12/100 [00:04<00:28,  3.06it/s]

  Query execution time: 0.22 seconds


 13%|█▎        | 13/100 [00:04<00:30,  2.83it/s]

  Query execution time: 0.23 seconds


 14%|█▍        | 14/100 [00:05<00:29,  2.88it/s]

  Query execution time: 0.31 seconds


 15%|█▌        | 15/100 [00:05<00:27,  3.10it/s]

  Query execution time: 0.24 seconds


 16%|█▌        | 16/100 [00:05<00:32,  2.60it/s]

  Query execution time: 0.50 seconds


 17%|█▋        | 17/100 [00:06<00:29,  2.81it/s]

  Query execution time: 0.27 seconds


 18%|█▊        | 18/100 [00:06<00:30,  2.71it/s]

  Query execution time: 0.37 seconds


 19%|█▉        | 19/100 [00:06<00:27,  2.92it/s]

  Query execution time: 0.26 seconds


 20%|██        | 20/100 [00:07<00:28,  2.79it/s]

  Query execution time: 0.37 seconds


 21%|██        | 21/100 [00:07<00:31,  2.53it/s]

  Query execution time: 0.45 seconds


 22%|██▏       | 22/100 [00:07<00:28,  2.75it/s]

  Query execution time: 0.26 seconds


 23%|██▎       | 23/100 [00:08<00:29,  2.58it/s]

  Query execution time: 0.42 seconds


 24%|██▍       | 24/100 [00:08<00:27,  2.76it/s]

  Query execution time: 0.28 seconds


 25%|██▌       | 25/100 [00:09<00:27,  2.72it/s]

  Query execution time: 0.33 seconds


 26%|██▌       | 26/100 [00:09<00:26,  2.78it/s]

  Query execution time: 0.31 seconds


 27%|██▋       | 27/100 [00:09<00:25,  2.84it/s]

  Query execution time: 0.31 seconds


 28%|██▊       | 28/100 [00:10<00:26,  2.77it/s]

  Query execution time: 0.34 seconds


 29%|██▉       | 29/100 [00:10<00:28,  2.50it/s]

  Query execution time: 0.46 seconds


 30%|███       | 30/100 [00:10<00:26,  2.62it/s]

  Query execution time: 0.31 seconds


 31%|███       | 31/100 [00:11<00:25,  2.66it/s]

  Query execution time: 0.32 seconds


 32%|███▏      | 32/100 [00:11<00:25,  2.71it/s]

  Query execution time: 0.32 seconds


 33%|███▎      | 33/100 [00:12<00:28,  2.32it/s]

  Query execution time: 0.55 seconds


 34%|███▍      | 34/100 [00:12<00:31,  2.11it/s]

  Query execution time: 0.55 seconds


 35%|███▌      | 35/100 [00:13<00:27,  2.37it/s]

  Query execution time: 0.27 seconds


 36%|███▌      | 36/100 [00:13<00:28,  2.25it/s]

  Query execution time: 0.47 seconds


 37%|███▋      | 37/100 [00:14<00:27,  2.31it/s]

  Query execution time: 0.34 seconds


 38%|███▊      | 38/100 [00:14<00:26,  2.34it/s]

  Query execution time: 0.38 seconds


 39%|███▉      | 39/100 [00:14<00:25,  2.35it/s]

  Query execution time: 0.39 seconds


 40%|████      | 40/100 [00:15<00:28,  2.08it/s]

  Query execution time: 0.57 seconds


 41%|████      | 41/100 [00:16<00:30,  1.94it/s]

  Query execution time: 0.55 seconds


 42%|████▏     | 42/100 [00:16<00:35,  1.61it/s]

  Query execution time: 0.83 seconds


 43%|████▎     | 43/100 [00:17<00:31,  1.81it/s]

  Query execution time: 0.36 seconds


 44%|████▍     | 44/100 [00:17<00:26,  2.08it/s]

  Query execution time: 0.27 seconds


 45%|████▌     | 45/100 [00:18<00:28,  1.91it/s]

  Query execution time: 0.58 seconds


 46%|████▌     | 46/100 [00:18<00:27,  1.96it/s]

  Query execution time: 0.39 seconds


 47%|████▋     | 47/100 [00:19<00:24,  2.21it/s]

  Query execution time: 0.30 seconds


 48%|████▊     | 48/100 [00:19<00:22,  2.35it/s]

  Query execution time: 0.34 seconds
  Query execution time: 0.18 seconds


 49%|████▉     | 49/100 [00:19<00:18,  2.79it/s]



 50%|█████     | 50/100 [00:19<00:16,  2.96it/s]

  Query execution time: 0.25 seconds


 51%|█████     | 51/100 [00:20<00:15,  3.09it/s]

  Query execution time: 0.26 seconds


 52%|█████▏    | 52/100 [00:20<00:16,  2.92it/s]

  Query execution time: 0.35 seconds


 53%|█████▎    | 53/100 [00:21<00:16,  2.80it/s]

  Query execution time: 0.37 seconds


 55%|█████▌    | 55/100 [00:21<00:12,  3.65it/s]

  Query execution time: 0.23 seconds
  Query execution time: 0.14 seconds


 57%|█████▋    | 57/100 [00:21<00:08,  4.82it/s]

  Query execution time: 0.13 seconds
  Query execution time: 0.13 seconds


 59%|█████▉    | 59/100 [00:22<00:07,  5.35it/s]

  Query execution time: 0.21 seconds
  Query execution time: 0.11 seconds


 61%|██████    | 61/100 [00:22<00:08,  4.86it/s]

  Query execution time: 0.14 seconds
  Query execution time: 0.13 seconds


 63%|██████▎   | 63/100 [00:22<00:06,  5.99it/s]

  Query execution time: 0.12 seconds
  Query execution time: 0.11 seconds


 65%|██████▌   | 65/100 [00:23<00:05,  6.62it/s]

  Query execution time: 0.11 seconds
  Query execution time: 0.13 seconds


 67%|██████▋   | 67/100 [00:23<00:04,  6.99it/s]

  Query execution time: 0.12 seconds
  Query execution time: 0.12 seconds


 69%|██████▉   | 69/100 [00:23<00:04,  7.13it/s]

  Query execution time: 0.11 seconds
  Query execution time: 0.13 seconds


 71%|███████   | 71/100 [00:23<00:04,  7.12it/s]

  Query execution time: 0.13 seconds
  Query execution time: 0.12 seconds


 73%|███████▎  | 73/100 [00:24<00:03,  7.43it/s]

  Query execution time: 0.12 seconds
  Query execution time: 0.12 seconds


 75%|███████▌  | 75/100 [00:24<00:03,  7.41it/s]

  Query execution time: 0.11 seconds
  Query execution time: 0.14 seconds


 77%|███████▋  | 77/100 [00:24<00:03,  7.41it/s]

  Query execution time: 0.12 seconds
  Query execution time: 0.12 seconds


 79%|███████▉  | 79/100 [00:24<00:02,  7.67it/s]

  Query execution time: 0.12 seconds
  Query execution time: 0.11 seconds


 81%|████████  | 81/100 [00:25<00:02,  7.46it/s]

  Query execution time: 0.14 seconds
  Query execution time: 0.12 seconds


 83%|████████▎ | 83/100 [00:25<00:02,  7.53it/s]

  Query execution time: 0.14 seconds
  Query execution time: 0.10 seconds


 85%|████████▌ | 85/100 [00:25<00:01,  7.50it/s]

  Query execution time: 0.11 seconds
  Query execution time: 0.13 seconds


 87%|████████▋ | 87/100 [00:25<00:01,  8.17it/s]

  Query execution time: 0.10 seconds
  Query execution time: 0.10 seconds


 89%|████████▉ | 89/100 [00:26<00:01,  7.07it/s]

  Query execution time: 0.10 seconds
  Query execution time: 0.18 seconds


 91%|█████████ | 91/100 [00:26<00:01,  7.03it/s]

  Query execution time: 0.12 seconds
  Query execution time: 0.14 seconds


 93%|█████████▎| 93/100 [00:26<00:01,  6.76it/s]

  Query execution time: 0.18 seconds
  Query execution time: 0.11 seconds


 94%|█████████▍| 94/100 [00:27<00:00,  7.00it/s]

  Query execution time: 0.12 seconds
  Query execution time: 0.20 seconds


 96%|█████████▌| 96/100 [00:27<00:00,  6.67it/s]

  Query execution time: 0.11 seconds


 98%|█████████▊| 98/100 [00:27<00:00,  7.39it/s]

  Query execution time: 0.10 seconds
  Query execution time: 0.11 seconds


100%|██████████| 100/100 [00:27<00:00,  3.59it/s]

  Query execution time: 0.12 seconds
  Query execution time: 0.10 seconds
Embedding model: BAAI/bge-small-en, Enricher: add_title



  0%|          | 0/100 [00:00<?, ?it/s]



  2%|▏         | 2/100 [00:00<00:19,  5.13it/s]

  Query execution time: 0.27 seconds
  Query execution time: 0.12 seconds


  4%|▍         | 4/100 [00:00<00:14,  6.44it/s]

  Query execution time: 0.12 seconds
  Query execution time: 0.12 seconds


  6%|▌         | 6/100 [00:00<00:14,  6.57it/s]

  Query execution time: 0.17 seconds
  Query execution time: 0.12 seconds


  8%|▊         | 8/100 [00:01<00:13,  6.85it/s]

  Query execution time: 0.13 seconds
  Query execution time: 0.13 seconds


 10%|█         | 10/100 [00:01<00:12,  7.29it/s]

  Query execution time: 0.12 seconds
  Query execution time: 0.11 seconds


 12%|█▏        | 12/100 [00:01<00:11,  7.49it/s]

  Query execution time: 0.12 seconds
  Query execution time: 0.12 seconds


 14%|█▍        | 14/100 [00:02<00:11,  7.20it/s]

  Query execution time: 0.13 seconds
  Query execution time: 0.13 seconds


 16%|█▌        | 16/100 [00:02<00:11,  7.52it/s]

  Query execution time: 0.13 seconds
  Query execution time: 0.10 seconds


 18%|█▊        | 18/100 [00:02<00:11,  7.26it/s]

  Query execution time: 0.14 seconds
  Query execution time: 0.12 seconds


 20%|██        | 20/100 [00:02<00:11,  7.15it/s]

  Query execution time: 0.13 seconds
  Query execution time: 0.13 seconds


 22%|██▏       | 22/100 [00:03<00:13,  5.93it/s]

  Query execution time: 0.21 seconds
  Query execution time: 0.16 seconds


 24%|██▍       | 24/100 [00:03<00:11,  6.39it/s]

  Query execution time: 0.13 seconds
  Query execution time: 0.14 seconds


 26%|██▌       | 26/100 [00:03<00:10,  7.02it/s]

  Query execution time: 0.12 seconds
  Query execution time: 0.11 seconds


 27%|██▋       | 27/100 [00:04<00:14,  5.09it/s]

  Query execution time: 0.31 seconds
  Query execution time: 0.19 seconds


 29%|██▉       | 29/100 [00:04<00:13,  5.40it/s]

  Query execution time: 0.14 seconds


 31%|███       | 31/100 [00:04<00:10,  6.29it/s]

  Query execution time: 0.12 seconds
  Query execution time: 0.12 seconds


 33%|███▎      | 33/100 [00:05<00:09,  6.70it/s]

  Query execution time: 0.14 seconds
  Query execution time: 0.12 seconds


 35%|███▌      | 35/100 [00:05<00:09,  7.11it/s]

  Query execution time: 0.12 seconds
  Query execution time: 0.12 seconds


 37%|███▋      | 37/100 [00:05<00:08,  7.20it/s]

  Query execution time: 0.12 seconds
  Query execution time: 0.13 seconds


 39%|███▉      | 39/100 [00:05<00:08,  7.14it/s]

  Query execution time: 0.14 seconds
  Query execution time: 0.12 seconds


 41%|████      | 41/100 [00:06<00:08,  7.30it/s]

  Query execution time: 0.12 seconds
  Query execution time: 0.12 seconds


 43%|████▎     | 43/100 [00:06<00:07,  7.13it/s]

  Query execution time: 0.13 seconds
  Query execution time: 0.13 seconds


 45%|████▌     | 45/100 [00:06<00:09,  5.65it/s]

  Query execution time: 0.13 seconds
  Query execution time: 0.15 seconds


 47%|████▋     | 47/100 [00:07<00:08,  6.10it/s]

  Query execution time: 0.15 seconds
  Query execution time: 0.13 seconds


 49%|████▉     | 49/100 [00:07<00:07,  6.75it/s]

  Query execution time: 0.12 seconds
  Query execution time: 0.12 seconds


 51%|█████     | 51/100 [00:07<00:06,  7.07it/s]

  Query execution time: 0.12 seconds
  Query execution time: 0.13 seconds


 53%|█████▎    | 53/100 [00:08<00:06,  7.41it/s]

  Query execution time: 0.11 seconds
  Query execution time: 0.12 seconds


 55%|█████▌    | 55/100 [00:08<00:06,  6.77it/s]

  Query execution time: 0.19 seconds
  Query execution time: 0.12 seconds


 57%|█████▋    | 57/100 [00:08<00:06,  7.07it/s]

  Query execution time: 0.12 seconds
  Query execution time: 0.12 seconds


 59%|█████▉    | 59/100 [00:08<00:05,  7.22it/s]

  Query execution time: 0.13 seconds
  Query execution time: 0.12 seconds


 61%|██████    | 61/100 [00:09<00:05,  7.17it/s]

  Query execution time: 0.12 seconds
  Query execution time: 0.13 seconds


 63%|██████▎   | 63/100 [00:09<00:05,  7.33it/s]

  Query execution time: 0.12 seconds
  Query execution time: 0.12 seconds


 65%|██████▌   | 65/100 [00:09<00:04,  7.53it/s]

  Query execution time: 0.11 seconds
  Query execution time: 0.12 seconds


 67%|██████▋   | 67/100 [00:09<00:04,  7.49it/s]

  Query execution time: 0.13 seconds
  Query execution time: 0.12 seconds


 69%|██████▉   | 69/100 [00:10<00:04,  7.62it/s]

  Query execution time: 0.12 seconds
  Query execution time: 0.11 seconds


 71%|███████   | 71/100 [00:10<00:03,  7.48it/s]

  Query execution time: 0.13 seconds
  Query execution time: 0.12 seconds


 73%|███████▎  | 73/100 [00:10<00:03,  7.83it/s]

  Query execution time: 0.11 seconds
  Query execution time: 0.11 seconds


 75%|███████▌  | 75/100 [00:11<00:03,  7.30it/s]

  Query execution time: 0.13 seconds
  Query execution time: 0.13 seconds


 77%|███████▋  | 77/100 [00:11<00:03,  7.43it/s]

  Query execution time: 0.12 seconds
  Query execution time: 0.12 seconds


 78%|███████▊  | 78/100 [00:11<00:02,  7.65it/s]

  Query execution time: 0.11 seconds


 80%|████████  | 80/100 [00:11<00:03,  6.50it/s]

  Query execution time: 0.23 seconds
  Query execution time: 0.11 seconds


 82%|████████▏ | 82/100 [00:12<00:02,  6.96it/s]

  Query execution time: 0.12 seconds
  Query execution time: 0.12 seconds


 84%|████████▍ | 84/100 [00:12<00:02,  7.07it/s]

  Query execution time: 0.11 seconds
  Query execution time: 0.14 seconds


 85%|████████▌ | 85/100 [00:12<00:02,  7.12it/s]

  Query execution time: 0.12 seconds


 87%|████████▋ | 87/100 [00:12<00:02,  6.24it/s]

  Query execution time: 0.21 seconds
  Query execution time: 0.13 seconds


 89%|████████▉ | 89/100 [00:13<00:01,  6.71it/s]

  Query execution time: 0.12 seconds
  Query execution time: 0.13 seconds


 90%|█████████ | 90/100 [00:13<00:01,  6.93it/s]

  Query execution time: 0.12 seconds
  Query execution time: 0.11 seconds


 92%|█████████▏| 92/100 [00:13<00:01,  6.09it/s]

  Query execution time: 0.12 seconds


 94%|█████████▍| 94/100 [00:13<00:00,  6.80it/s]

  Query execution time: 0.11 seconds
  Query execution time: 0.12 seconds


 96%|█████████▌| 96/100 [00:14<00:00,  7.00it/s]

  Query execution time: 0.12 seconds
  Query execution time: 0.13 seconds


 98%|█████████▊| 98/100 [00:14<00:00,  7.56it/s]

  Query execution time: 0.10 seconds
  Query execution time: 0.12 seconds


100%|██████████| 100/100 [00:14<00:00,  6.81it/s]

  Query execution time: 0.13 seconds
  Query execution time: 0.12 seconds
Embedding model: BAAI/bge-small-en, Enricher: add_title_and_abstract



  1%|          | 1/100 [00:00<00:15,  6.49it/s]

  Query execution time: 0.13 seconds


  3%|▎         | 3/100 [00:00<00:13,  7.03it/s]

  Query execution time: 0.12 seconds
  Query execution time: 0.13 seconds


  5%|▌         | 5/100 [00:00<00:12,  7.35it/s]

  Query execution time: 0.12 seconds
  Query execution time: 0.12 seconds


  7%|▋         | 7/100 [00:00<00:12,  7.54it/s]

  Query execution time: 0.13 seconds
  Query execution time: 0.11 seconds


  9%|▉         | 9/100 [00:01<00:12,  7.30it/s]

  Query execution time: 0.14 seconds
  Query execution time: 0.12 seconds


 11%|█         | 11/100 [00:01<00:11,  7.58it/s]

  Query execution time: 0.13 seconds
  Query execution time: 0.10 seconds


 13%|█▎        | 13/100 [00:01<00:11,  7.32it/s]

  Query execution time: 0.12 seconds
  Query execution time: 0.13 seconds


 15%|█▌        | 15/100 [00:02<00:12,  6.64it/s]

  Query execution time: 0.20 seconds
  Query execution time: 0.11 seconds


 17%|█▋        | 17/100 [00:02<00:11,  6.99it/s]

  Query execution time: 0.12 seconds
  Query execution time: 0.12 seconds


 19%|█▉        | 19/100 [00:02<00:11,  7.24it/s]

  Query execution time: 0.13 seconds
  Query execution time: 0.12 seconds


 21%|██        | 21/100 [00:02<00:10,  7.61it/s]

  Query execution time: 0.12 seconds
  Query execution time: 0.11 seconds


 23%|██▎       | 23/100 [00:03<00:10,  7.27it/s]

  Query execution time: 0.14 seconds
  Query execution time: 0.13 seconds


 24%|██▍       | 24/100 [00:03<00:10,  7.38it/s]

  Query execution time: 0.12 seconds
  Query execution time: 0.11 seconds


 26%|██▌       | 26/100 [00:03<00:12,  6.05it/s]

  Query execution time: 0.12 seconds


 28%|██▊       | 28/100 [00:04<00:11,  6.21it/s]

  Query execution time: 0.15 seconds
  Query execution time: 0.14 seconds


 30%|███       | 30/100 [00:04<00:10,  6.85it/s]

  Query execution time: 0.12 seconds
  Query execution time: 0.12 seconds


 32%|███▏      | 32/100 [00:04<00:10,  6.75it/s]

  Query execution time: 0.14 seconds
  Query execution time: 0.14 seconds


 34%|███▍      | 34/100 [00:04<00:08,  7.37it/s]

  Query execution time: 0.11 seconds
  Query execution time: 0.11 seconds


 36%|███▌      | 36/100 [00:05<00:08,  7.37it/s]

  Query execution time: 0.10 seconds
  Query execution time: 0.14 seconds


 38%|███▊      | 38/100 [00:05<00:08,  7.52it/s]

  Query execution time: 0.12 seconds
  Query execution time: 0.11 seconds


 40%|████      | 40/100 [00:05<00:07,  7.92it/s]

  Query execution time: 0.11 seconds
  Query execution time: 0.11 seconds


 42%|████▏     | 42/100 [00:05<00:07,  7.78it/s]

  Query execution time: 0.12 seconds
  Query execution time: 0.12 seconds


 44%|████▍     | 44/100 [00:06<00:06,  8.02it/s]

  Query execution time: 0.11 seconds
  Query execution time: 0.11 seconds


 46%|████▌     | 46/100 [00:06<00:06,  7.86it/s]

  Query execution time: 0.11 seconds
  Query execution time: 0.13 seconds


 48%|████▊     | 48/100 [00:06<00:06,  7.96it/s]

  Query execution time: 0.11 seconds
  Query execution time: 0.11 seconds


 50%|█████     | 50/100 [00:06<00:07,  6.97it/s]

  Query execution time: 0.13 seconds
  Query execution time: 0.14 seconds


 52%|█████▏    | 52/100 [00:07<00:06,  7.04it/s]

  Query execution time: 0.14 seconds
  Query execution time: 0.12 seconds


 54%|█████▍    | 54/100 [00:07<00:06,  7.59it/s]

  Query execution time: 0.11 seconds
  Query execution time: 0.11 seconds


 56%|█████▌    | 56/100 [00:07<00:05,  7.46it/s]

  Query execution time: 0.12 seconds
  Query execution time: 0.12 seconds


 58%|█████▊    | 58/100 [00:08<00:05,  7.71it/s]

  Query execution time: 0.12 seconds
  Query execution time: 0.11 seconds


 60%|██████    | 60/100 [00:08<00:04,  8.21it/s]

  Query execution time: 0.10 seconds
  Query execution time: 0.10 seconds


 61%|██████    | 61/100 [00:08<00:10,  3.58it/s]

  Query execution time: 0.63 seconds


 62%|██████▏   | 62/100 [00:09<00:13,  2.87it/s]

  Query execution time: 0.49 seconds


 63%|██████▎   | 63/100 [00:09<00:14,  2.52it/s]

  Query execution time: 0.49 seconds


 64%|██████▍   | 64/100 [00:10<00:14,  2.49it/s]

  Query execution time: 0.40 seconds


 65%|██████▌   | 65/100 [00:10<00:13,  2.61it/s]

  Query execution time: 0.33 seconds


 66%|██████▌   | 66/100 [00:11<00:12,  2.65it/s]

  Query execution time: 0.35 seconds


 68%|██████▊   | 68/100 [00:11<00:09,  3.53it/s]

  Query execution time: 0.27 seconds
  Query execution time: 0.11 seconds


 70%|███████   | 70/100 [00:11<00:06,  4.75it/s]

  Query execution time: 0.15 seconds
  Query execution time: 0.11 seconds


 72%|███████▏  | 72/100 [00:12<00:06,  4.17it/s]

  Query execution time: 0.27 seconds
  Query execution time: 0.12 seconds


 74%|███████▍  | 74/100 [00:12<00:04,  5.43it/s]

  Query execution time: 0.12 seconds
  Query execution time: 0.12 seconds


 76%|███████▌  | 76/100 [00:12<00:03,  6.48it/s]

  Query execution time: 0.11 seconds
  Query execution time: 0.11 seconds


 78%|███████▊  | 78/100 [00:13<00:02,  7.35it/s]

  Query execution time: 0.11 seconds
  Query execution time: 0.10 seconds


 80%|████████  | 80/100 [00:13<00:02,  7.98it/s]

  Query execution time: 0.10 seconds
  Query execution time: 0.10 seconds


 82%|████████▏ | 82/100 [00:13<00:02,  8.13it/s]

  Query execution time: 0.10 seconds
  Query execution time: 0.12 seconds


 84%|████████▍ | 84/100 [00:13<00:01,  8.49it/s]

  Query execution time: 0.11 seconds
  Query execution time: 0.10 seconds


 86%|████████▌ | 86/100 [00:14<00:01,  8.18it/s]

  Query execution time: 0.12 seconds
  Query execution time: 0.11 seconds


 88%|████████▊ | 88/100 [00:14<00:01,  8.55it/s]

  Query execution time: 0.10 seconds
  Query execution time: 0.10 seconds


 90%|█████████ | 90/100 [00:14<00:01,  8.44it/s]

  Query execution time: 0.10 seconds
  Query execution time: 0.11 seconds


 92%|█████████▏| 92/100 [00:14<00:01,  7.32it/s]

  Query execution time: 0.13 seconds
  Query execution time: 0.12 seconds


 94%|█████████▍| 94/100 [00:15<00:00,  7.79it/s]

  Query execution time: 0.10 seconds
  Query execution time: 0.11 seconds


 96%|█████████▌| 96/100 [00:15<00:00,  8.25it/s]

  Query execution time: 0.11 seconds
  Query execution time: 0.09 seconds


 98%|█████████▊| 98/100 [00:15<00:00,  8.79it/s]

  Query execution time: 0.09 seconds
  Query execution time: 0.10 seconds


100%|██████████| 100/100 [00:15<00:00,  6.35it/s]

  Query execution time: 0.10 seconds
  Query execution time: 0.11 seconds
Saved results for BAAI/bge-small-en to tests/bge_ranks_topk5000.csv





Saved results for BAAI/bge-small-en to tests/bge_ranks_topk500.csv
