# Tuning the Top-k Query parameter

First we set up the environment

In [8]:
from dotenv import load_dotenv
import os
import pandas as pd
from time import time
import torch
from TextEnrichers import get_enricher, TextEnricher
from database.database import Database
from Embedders import Embedder, get_embedder
from tqdm import tqdm

load_dotenv('.env', override=True)
print(os.getenv('DB_PORT'))


5432


In [9]:
# Database setup
db = Database()
db.test_connection()

device = 'cuda' if torch.cuda.is_available(
) else 'mps' if torch.mps.is_available() else 'cpu'
print(f"Using device: {device}")

Database         User             Host                             Port            
citeline_db      bbasseri         localhost                        5432            
Database version: ('PostgreSQL 17.3 (Homebrew) on x86_64-apple-darwin23.6.0, compiled by Apple clang version 16.0.0 (clang-1600.0.26.6), 64-bit',)
Using device: mps


## Investigating precision over `k`

For our various embedding models and enrichment strategies, we want to know the smallest `top_k` value that will still retrieve the target reference for a given sentence. 

To investigate this, we'll sample 100 examples from the non-trivial training data. Each example typically has 1-2 target DOI's. For each example, we'll query the database with a large `top_k` parameter to start, so we can be sure the database returns the target references. Then we can ask at what index in the query results does a target DOI first appear. Ideally, the ranks will all be very high, indicated by having *low* indices in the query results. We also expect enriched examples to have their target doi's higher ranked (lower indices).

In [10]:
examples = pd.read_json('data/dataset/100/nontrivial.jsonl', lines=True)


def lowest_index_matching_doi(target_doi: str, query_results: list) -> int:
    """
    Returns the first index of the query results where the chunk doi matches the target doi.
    If no match is found, returns -1.
    """
    for i, result in enumerate(query_results):
        if target_doi == result.doi:
            return i
    return -1


def get_ranks(
    example: pd.Series,
    embedding,
    top_k: int,
    probes: int,
    table_name: str = 'lib',
    target_column: str = 'bge',
) -> list[int]:
    
    target_dois = example['citation_dois']

    # Query
    start = time()
    query_results = db.query_vector_column(
        query_vector=embedding,
        target_column=target_column,
        table_name=table_name,
        pubdate=example['pubdate'],
        top_k=top_k,
        use_index=True,
        probes=probes,
    )
    print(f"Query time: {time() - start:.2f}s")
    print(f"Found {len(query_results)} results")
    
    ranks = [lowest_index_matching_doi(
        target_doi=doi, query_results=query_results) for doi in target_dois]
    return ranks


def ranks_at_k(
        examples: pd.DataFrame,
        embedder_name: str,
        enricher_name: str,
        target_column: str,
        top_k: int,
        probes: int=256) -> list[int]:
    """
    Calculates the 'ranks' for a given embedding model and enrichment function.
    The ranks are the indices of the first chunk with the target doi for each example, i.e.
    the lowest k that would retrieve a chunk with the target doi.
    """

    # Setup
    print(f"Embedding model: {embedder_name}, Enricher: {enricher_name}")
    embedder = get_embedder(embedder_name, device=device)
    enricher = get_enricher(
        enricher_name, path_to_data='data/preprocessed/reviews.jsonl')

    # Enrich
    texts_with_dois = list(
        examples[['sent_no_cit', 'source_doi']].itertuples(index=False, name=None))
    enriched_texts = enricher.enrich_batch(texts_with_dois)
    embeddings = embedder(enriched_texts)

    # Rank
    ranks = []
    for i in tqdm(range(len(examples))):
        embedding = embeddings[i]
        example = examples.iloc[i]
        ranks += get_ranks(
            example=example,
            embedding=embedding,
            table_name='lib',
            target_column=target_column,
            top_k=top_k,
            probes=probes
            )
    return ranks

In [36]:
results = ranks_at_k(
    examples=examples,
    embedder_name='BAAI/bge-small-en',
    enricher_name='identity',
    target_column='bge',
    top_k=7000,
    probes=80
)
print(len(results))

Embedding model: BAAI/bge-small-en, Enricher: identity


  0%|          | 0/100 [00:00<?, ?it/s]

Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


  1%|          | 1/100 [00:00<00:28,  3.50it/s]

  Query execution time: 0.27 seconds
  Found 7000 results
top_k: 7000
Query time: 0.28s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


  2%|▏         | 2/100 [00:00<00:27,  3.53it/s]

  Query execution time: 0.27 seconds
  Found 7000 results
top_k: 7000
Query time: 0.28s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


  3%|▎         | 3/100 [00:00<00:26,  3.62it/s]

  Query execution time: 0.25 seconds
  Found 7000 results
top_k: 7000
Query time: 0.26s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


  4%|▍         | 4/100 [00:01<00:29,  3.23it/s]

  Query execution time: 0.34 seconds
  Found 7000 results
top_k: 7000
Query time: 0.36s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


  5%|▌         | 5/100 [00:01<00:29,  3.18it/s]

  Query execution time: 0.31 seconds
  Found 7000 results
top_k: 7000
Query time: 0.32s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


  6%|▌         | 6/100 [00:01<00:28,  3.31it/s]

  Query execution time: 0.26 seconds
  Found 7000 results
top_k: 7000
Query time: 0.27s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


  7%|▋         | 7/100 [00:02<00:28,  3.30it/s]

  Query execution time: 0.29 seconds
  Found 7000 results
top_k: 7000
Query time: 0.30s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


  8%|▊         | 8/100 [00:02<00:28,  3.19it/s]

  Query execution time: 0.32 seconds
  Found 7000 results
top_k: 7000
Query time: 0.33s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


  9%|▉         | 9/100 [00:02<00:27,  3.35it/s]

  Query execution time: 0.25 seconds
  Found 7000 results
top_k: 7000
Query time: 0.26s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 10%|█         | 10/100 [00:02<00:26,  3.37it/s]

  Query execution time: 0.28 seconds
  Found 7000 results
top_k: 7000
Query time: 0.29s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 11%|█         | 11/100 [00:03<00:26,  3.40it/s]

  Query execution time: 0.27 seconds
  Found 7000 results
top_k: 7000
Query time: 0.28s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 12%|█▏        | 12/100 [00:03<00:29,  2.96it/s]

  Query execution time: 0.27 seconds
  Found 7000 results
top_k: 7000
Query time: 0.43s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 13%|█▎        | 13/100 [00:04<00:30,  2.82it/s]

  Query execution time: 0.38 seconds
  Found 7000 results
top_k: 7000
Query time: 0.39s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 14%|█▍        | 14/100 [00:04<00:29,  2.94it/s]

  Query execution time: 0.29 seconds
  Found 7000 results
top_k: 7000
Query time: 0.30s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 15%|█▌        | 15/100 [00:04<00:27,  3.07it/s]

  Query execution time: 0.28 seconds
  Found 7000 results
top_k: 7000
Query time: 0.29s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 16%|█▌        | 16/100 [00:04<00:26,  3.18it/s]

  Query execution time: 0.27 seconds
  Found 7000 results
top_k: 7000
Query time: 0.28s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 17%|█▋        | 17/100 [00:05<00:24,  3.33it/s]

  Query execution time: 0.25 seconds
  Found 7000 results
top_k: 7000
Query time: 0.27s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 18%|█▊        | 18/100 [00:05<00:27,  3.03it/s]

  Query execution time: 0.38 seconds
  Found 7000 results
top_k: 7000
Query time: 0.39s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 19%|█▉        | 19/100 [00:05<00:25,  3.19it/s]

  Query execution time: 0.26 seconds
  Found 7000 results
top_k: 7000
Query time: 0.27s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 20%|██        | 20/100 [00:06<00:24,  3.25it/s]

  Query execution time: 0.28 seconds
  Found 7000 results
top_k: 7000
Query time: 0.29s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 21%|██        | 21/100 [00:06<00:24,  3.26it/s]

  Query execution time: 0.29 seconds
  Found 7000 results
top_k: 7000
Query time: 0.30s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 22%|██▏       | 22/100 [00:06<00:24,  3.23it/s]

  Query execution time: 0.30 seconds
  Found 7000 results
top_k: 7000
Query time: 0.31s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 23%|██▎       | 23/100 [00:07<00:25,  2.97it/s]

  Query execution time: 0.38 seconds
  Found 7000 results
top_k: 7000
Query time: 0.40s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 24%|██▍       | 24/100 [00:07<00:24,  3.16it/s]

  Query execution time: 0.25 seconds
  Found 7000 results
top_k: 7000
Query time: 0.27s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 25%|██▌       | 25/100 [00:07<00:22,  3.29it/s]

  Query execution time: 0.26 seconds
  Found 7000 results
top_k: 7000
Query time: 0.27s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 26%|██▌       | 26/100 [00:08<00:23,  3.18it/s]

  Query execution time: 0.32 seconds
  Found 7000 results
top_k: 7000
Query time: 0.34s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 27%|██▋       | 27/100 [00:08<00:21,  3.36it/s]

  Query execution time: 0.24 seconds
  Found 7000 results
top_k: 7000
Query time: 0.26s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 28%|██▊       | 28/100 [00:08<00:21,  3.29it/s]

  Query execution time: 0.30 seconds
  Found 7000 results
top_k: 7000
Query time: 0.31s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 29%|██▉       | 29/100 [00:09<00:23,  3.02it/s]

  Query execution time: 0.38 seconds
  Found 7000 results
top_k: 7000
Query time: 0.39s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 30%|███       | 30/100 [00:09<00:23,  3.01it/s]

  Query execution time: 0.32 seconds
  Found 7000 results
top_k: 7000
Query time: 0.33s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 31%|███       | 31/100 [00:09<00:22,  3.05it/s]

  Query execution time: 0.30 seconds
  Found 7000 results
top_k: 7000
Query time: 0.32s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 32%|███▏      | 32/100 [00:10<00:21,  3.11it/s]

  Query execution time: 0.29 seconds
  Found 7000 results
top_k: 7000
Query time: 0.31s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 33%|███▎      | 33/100 [00:10<00:24,  2.76it/s]

  Query execution time: 0.44 seconds
  Found 7000 results
top_k: 7000
Query time: 0.45s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 34%|███▍      | 34/100 [00:10<00:25,  2.59it/s]

  Query execution time: 0.27 seconds
  Found 7000 results
top_k: 7000
Query time: 0.44s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 35%|███▌      | 35/100 [00:11<00:24,  2.61it/s]

  Query execution time: 0.35 seconds
  Found 7000 results
top_k: 7000
Query time: 0.37s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 36%|███▌      | 36/100 [00:11<00:24,  2.58it/s]

  Query execution time: 0.38 seconds
  Found 7000 results
top_k: 7000
Query time: 0.40s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 37%|███▋      | 37/100 [00:11<00:21,  2.88it/s]

  Query execution time: 0.24 seconds
  Found 7000 results
top_k: 7000
Query time: 0.25s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 38%|███▊      | 38/100 [00:12<00:20,  3.03it/s]

  Query execution time: 0.27 seconds
  Found 7000 results
top_k: 7000
Query time: 0.29s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 39%|███▉      | 39/100 [00:12<00:21,  2.88it/s]

  Query execution time: 0.37 seconds
  Found 7000 results
top_k: 7000
Query time: 0.38s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 40%|████      | 40/100 [00:12<00:20,  2.99it/s]

  Query execution time: 0.29 seconds
  Found 7000 results
top_k: 7000
Query time: 0.30s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 41%|████      | 41/100 [00:13<00:19,  3.10it/s]

  Query execution time: 0.28 seconds
  Found 7000 results
top_k: 7000
Query time: 0.29s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 42%|████▏     | 42/100 [00:13<00:20,  2.78it/s]

  Query execution time: 0.41 seconds
  Found 7000 results
top_k: 7000
Query time: 0.44s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 43%|████▎     | 43/100 [00:14<00:19,  2.94it/s]

  Query execution time: 0.28 seconds
  Found 7000 results
top_k: 7000
Query time: 0.29s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 44%|████▍     | 44/100 [00:14<00:18,  3.02it/s]

  Query execution time: 0.30 seconds
  Found 7000 results
top_k: 7000
Query time: 0.31s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 45%|████▌     | 45/100 [00:14<00:16,  3.25it/s]

  Query execution time: 0.24 seconds
  Found 7000 results
top_k: 7000
Query time: 0.25s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 46%|████▌     | 46/100 [00:14<00:16,  3.29it/s]

  Query execution time: 0.28 seconds
  Found 7000 results
top_k: 7000
Query time: 0.29s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 47%|████▋     | 47/100 [00:15<00:16,  3.23it/s]

  Query execution time: 0.30 seconds
  Found 7000 results
top_k: 7000
Query time: 0.32s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 48%|████▊     | 48/100 [00:15<00:15,  3.30it/s]

  Query execution time: 0.27 seconds
  Found 7000 results
top_k: 7000
Query time: 0.29s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 49%|████▉     | 49/100 [00:15<00:17,  2.94it/s]

  Query execution time: 0.41 seconds
  Found 7000 results
top_k: 7000
Query time: 0.42s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 50%|█████     | 50/100 [00:16<00:16,  3.06it/s]

  Query execution time: 0.27 seconds
  Found 7000 results
top_k: 7000
Query time: 0.29s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 51%|█████     | 51/100 [00:16<00:16,  2.89it/s]

  Query execution time: 0.33 seconds
  Found 7000 results
top_k: 7000
Query time: 0.37s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 52%|█████▏    | 52/100 [00:16<00:16,  2.95it/s]

  Query execution time: 0.31 seconds
  Found 7000 results
top_k: 7000
Query time: 0.32s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 53%|█████▎    | 53/100 [00:17<00:15,  3.04it/s]

  Query execution time: 0.29 seconds
  Found 7000 results
top_k: 7000
Query time: 0.30s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 54%|█████▍    | 54/100 [00:17<00:14,  3.12it/s]

  Query execution time: 0.29 seconds
  Found 7000 results
top_k: 7000
Query time: 0.30s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 55%|█████▌    | 55/100 [00:17<00:14,  3.17it/s]

  Query execution time: 0.28 seconds
  Found 7000 results
top_k: 7000
Query time: 0.30s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 56%|█████▌    | 56/100 [00:18<00:15,  2.82it/s]

  Query execution time: 0.29 seconds
  Found 7000 results
top_k: 7000
Query time: 0.44s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 57%|█████▋    | 57/100 [00:18<00:14,  3.00it/s]

  Query execution time: 0.27 seconds
  Found 7000 results
top_k: 7000
Query time: 0.28s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 58%|█████▊    | 58/100 [00:18<00:13,  3.10it/s]

  Query execution time: 0.28 seconds
  Found 7000 results
top_k: 7000
Query time: 0.30s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 59%|█████▉    | 59/100 [00:19<00:12,  3.23it/s]

  Query execution time: 0.26 seconds
  Found 7000 results
top_k: 7000
Query time: 0.28s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 60%|██████    | 60/100 [00:19<00:11,  3.34it/s]

  Query execution time: 0.26 seconds
  Found 7000 results
top_k: 7000
Query time: 0.27s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 61%|██████    | 61/100 [00:19<00:12,  3.07it/s]

  Query execution time: 0.37 seconds
  Found 7000 results
top_k: 7000
Query time: 0.38s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 62%|██████▏   | 62/100 [00:20<00:13,  2.83it/s]

  Query execution time: 0.40 seconds
  Found 7000 results
top_k: 7000
Query time: 0.41s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 63%|██████▎   | 63/100 [00:20<00:12,  2.90it/s]

  Query execution time: 0.31 seconds
  Found 7000 results
top_k: 7000
Query time: 0.32s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 64%|██████▍   | 64/100 [00:20<00:13,  2.71it/s]

  Query execution time: 0.41 seconds
  Found 7000 results
top_k: 7000
Query time: 0.42s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 65%|██████▌   | 65/100 [00:21<00:12,  2.86it/s]

  Query execution time: 0.29 seconds
  Found 7000 results
top_k: 7000
Query time: 0.30s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 66%|██████▌   | 66/100 [00:21<00:10,  3.09it/s]

  Query execution time: 0.24 seconds
  Found 7000 results
top_k: 7000
Query time: 0.26s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 67%|██████▋   | 67/100 [00:21<00:11,  2.79it/s]

  Query execution time: 0.42 seconds
  Found 7000 results
top_k: 7000
Query time: 0.44s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 68%|██████▊   | 68/100 [00:22<00:12,  2.64it/s]

  Query execution time: 0.41 seconds
  Found 7000 results
top_k: 7000
Query time: 0.42s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 69%|██████▉   | 69/100 [00:22<00:10,  2.83it/s]

  Query execution time: 0.28 seconds
  Found 7000 results
top_k: 7000
Query time: 0.29s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 70%|███████   | 70/100 [00:23<00:10,  2.88it/s]

  Query execution time: 0.31 seconds
  Found 7000 results
top_k: 7000
Query time: 0.33s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 71%|███████   | 71/100 [00:23<00:10,  2.82it/s]

  Query execution time: 0.32 seconds
  Found 7000 results
top_k: 7000
Query time: 0.37s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 72%|███████▏  | 72/100 [00:23<00:09,  2.96it/s]

  Query execution time: 0.28 seconds
  Found 7000 results
top_k: 7000
Query time: 0.30s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 73%|███████▎  | 73/100 [00:23<00:08,  3.13it/s]

  Query execution time: 0.26 seconds
  Found 7000 results
top_k: 7000
Query time: 0.27s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 74%|███████▍  | 74/100 [00:24<00:09,  2.83it/s]

  Query execution time: 0.42 seconds
  Found 7000 results
top_k: 7000
Query time: 0.43s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 75%|███████▌  | 75/100 [00:24<00:08,  2.97it/s]

  Query execution time: 0.28 seconds
  Found 7000 results
top_k: 7000
Query time: 0.29s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 76%|███████▌  | 76/100 [00:25<00:07,  3.02it/s]

  Query execution time: 0.30 seconds
  Found 7000 results
top_k: 7000
Query time: 0.32s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 77%|███████▋  | 77/100 [00:25<00:08,  2.71it/s]

  Query execution time: 0.44 seconds
  Found 7000 results
top_k: 7000
Query time: 0.45s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 78%|███████▊  | 78/100 [00:25<00:07,  3.06it/s]

  Query execution time: 0.21 seconds
  Found 7000 results
top_k: 7000
Query time: 0.23s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 79%|███████▉  | 79/100 [00:26<00:07,  2.89it/s]

  Query execution time: 0.21 seconds
  Found 7000 results
top_k: 7000
Query time: 0.39s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 80%|████████  | 80/100 [00:26<00:06,  2.93it/s]

  Query execution time: 0.31 seconds
  Found 7000 results
top_k: 7000
Query time: 0.33s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 81%|████████  | 81/100 [00:26<00:06,  2.95it/s]

  Query execution time: 0.32 seconds
  Found 7000 results
top_k: 7000
Query time: 0.33s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 82%|████████▏ | 82/100 [00:27<00:05,  3.19it/s]

  Query execution time: 0.24 seconds
  Found 7000 results
top_k: 7000
Query time: 0.25s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 83%|████████▎ | 83/100 [00:27<00:06,  2.83it/s]

  Query execution time: 0.43 seconds
  Found 7000 results
top_k: 7000
Query time: 0.44s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 84%|████████▍ | 84/100 [00:27<00:06,  2.64it/s]

  Query execution time: 0.42 seconds
  Found 7000 results
top_k: 7000
Query time: 0.43s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 85%|████████▌ | 85/100 [00:28<00:05,  2.84it/s]

  Query execution time: 0.27 seconds
  Found 7000 results
top_k: 7000
Query time: 0.29s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 86%|████████▌ | 86/100 [00:28<00:05,  2.74it/s]

  Query execution time: 0.37 seconds
  Found 7000 results
top_k: 7000
Query time: 0.39s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 87%|████████▋ | 87/100 [00:29<00:05,  2.58it/s]

  Query execution time: 0.42 seconds
  Found 7000 results
top_k: 7000
Query time: 0.44s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 88%|████████▊ | 88/100 [00:29<00:04,  2.86it/s]

  Query execution time: 0.24 seconds
  Found 7000 results
top_k: 7000
Query time: 0.26s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 89%|████████▉ | 89/100 [00:29<00:03,  3.01it/s]

  Query execution time: 0.28 seconds
  Found 7000 results
top_k: 7000
Query time: 0.29s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 90%|█████████ | 90/100 [00:29<00:03,  3.09it/s]

  Query execution time: 0.28 seconds
  Found 7000 results
top_k: 7000
Query time: 0.30s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 91%|█████████ | 91/100 [00:30<00:02,  3.22it/s]

  Query execution time: 0.26 seconds
  Found 7000 results
top_k: 7000
Query time: 0.28s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 92%|█████████▏| 92/100 [00:30<00:02,  3.07it/s]

  Query execution time: 0.34 seconds
  Found 7000 results
top_k: 7000
Query time: 0.36s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 93%|█████████▎| 93/100 [00:30<00:02,  2.90it/s]

  Query execution time: 0.36 seconds
  Found 7000 results
top_k: 7000
Query time: 0.38s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 94%|█████████▍| 94/100 [00:31<00:02,  2.92it/s]

  Query execution time: 0.26 seconds
  Found 7000 results
top_k: 7000
Query time: 0.30s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 95%|█████████▌| 95/100 [00:31<00:01,  2.77it/s]

  Query execution time: 0.39 seconds
  Found 7000 results
top_k: 7000
Query time: 0.40s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 96%|█████████▌| 96/100 [00:31<00:01,  2.90it/s]

  Query execution time: 0.29 seconds
  Found 7000 results
top_k: 7000
Query time: 0.30s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 97%|█████████▋| 97/100 [00:32<00:01,  2.85it/s]

  Query execution time: 0.35 seconds
  Found 7000 results
top_k: 7000
Query time: 0.36s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 98%|█████████▊| 98/100 [00:32<00:00,  2.89it/s]

  Query execution time: 0.32 seconds
  Found 7000 results
top_k: 7000
Query time: 0.33s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


 99%|█████████▉| 99/100 [00:32<00:00,  3.10it/s]

  Query execution time: 0.25 seconds
  Found 7000 results
top_k: 7000
Query time: 0.26s
Found 7000 results
Session resources set for query optimization:

                SET synchronous_commit = 'on';
                -- SET wal_level = 'replica';
                -- SET max_wal_size = 'DEFAULT';
                SET maintenance_work_mem = '1GB';
                -- SET random_page_cost = '1.1';
                -- SET parallel_tuple_cost = '0.1';
                -- SET parallel_setup_cost = '1000';
                SET max_parallel_workers = '60';
                SET work_mem = '1GB';
                SET max_parallel_workers_per_gather = '60';
                -- SET shared_buffers = '28GB';
                SET effective_cache_size = '86GB';
                -- SET effective_io_concurrency = '200';
            


100%|██████████| 100/100 [00:33<00:00,  3.01it/s]

  Query execution time: 0.27 seconds
  Found 7000 results
top_k: 7000
Query time: 0.29s
Found 7000 results
111





In [37]:
found = len([r for r in results if r != -1])
print(f"Found {found} out of {len(results)}")
print(f"Found {found/len(results)*100:.2f}%")

Found 92 out of 111
Found 82.88%


In [35]:
for i, result in enumerate(results):
    print(i, result)

0 -1
1 596
2 7281
3 1972
4 0
5 24
6 0
7 503
8 -1
9 468
10 -1
11 -1
12 5
13 1
14 -1
15 20
16 1272
17 164
18 8045
19 3530
20 571
21 365
22 0
23 1
24 0
25 -1
26 5
27 177
28 3
29 -1
30 1
31 32
32 660
33 287
34 77
35 1
36 3891
37 95
38 340
39 0
40 104
41 3
42 1
43 -1
44 2904
45 3100
46 -1
47 0
48 26
49 47
50 -1
51 1
52 5571
53 452
54 250
55 1
56 2
57 0
58 383
59 -1
60 0
61 -1
62 0
63 329
64 -1
65 97
66 319
67 339
68 46
69 6
70 1746
71 0
72 70
73 6
74 -1
75 -1
76 3615
77 -1
78 9
79 2704
80 4
81 -1
82 1
83 22
84 565
85 531
86 1150
87 4
88 -1
89 2098
90 -1
91 30
92 16
93 6665
94 1
95 7
96 750
97 540
98 204
99 25
100 221
101 0
102 1344
103 607
104 -1
105 1
106 54
107 1583
108 688
109 2
110 27


In [24]:
examples.iloc[93]['sent_no_cit']

'This is clearly shown by the lateral duplicate sampling performed by , SI fig. S4) .'

In [4]:
from itertools import product
                      
embedding_models = ['BAAI/bge-small-en']
enrichment_methods = ['identity', 'add_abstract',
                      'add_title', 'add_title_and_abstract']
if device == 'cuda':
    embedding_models += ['bert-base-uncased', 'adsabs/astroBERT']

combos = list(product(embedding_models, enrichment_methods))
print(f"Combos: {combos}")

Combos: [('BAAI/bge-small-en', 'identity'), ('BAAI/bge-small-en', 'add_abstract'), ('BAAI/bge-small-en', 'add_title'), ('BAAI/bge-small-en', 'add_title_and_abstract')]


In [None]:
rank_data = {}
for enricher_name in ['identity', 'add_abstract', 'add_title', 'add_title_and_abstract']:
    rank_data[enricher_name] = ranks_at_k(
        examples=examples,
        embedder_name='BAAI/bge-small-en',
        enricher_name=enricher_name,
        target_column='bge_norm',
        top_k=300_000,
        probes=20
    )

Embedding model: BAAI/bge-small-en, Enricher: identity


  0%|          | 0/1 [00:00<?, ?it/s]

  Query execution time: 37.84 seconds
Found 2261334 results
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND


100%|██████████| 1/1 [00:48<00:00, 48.30s/it]


Embedding model: BAAI/bge-small-en, Enricher: add_abstract


  0%|          | 0/1 [00:00<?, ?it/s]

  Query execution time: 35.50 seconds
Found 2261334 results
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND


100%|██████████| 1/1 [00:45<00:00, 45.27s/it]


Embedding model: BAAI/bge-small-en, Enricher: add_title


  0%|          | 0/1 [00:00<?, ?it/s]

  Query execution time: 35.98 seconds
Found 2261334 results
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND


100%|██████████| 1/1 [00:46<00:00, 46.11s/it]


Embedding model: BAAI/bge-small-en, Enricher: add_title_and_abstract


  0%|          | 0/1 [00:00<?, ?it/s]

  Query execution time: 34.77 seconds
Found 2261334 results
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND


100%|██████████| 1/1 [00:44<00:00, 44.96s/it]


In [6]:
print(examples.iloc[0].sent_original)
for enricher_name, ranks in rank_data.items():
    print(f"Enricher: {enricher_name}. Rank: {ranks[0]}")

CGRO observations of X-ray binaries detected non-thermal power-law tails extending well beyond 100 keV with a photon index Γ LE ≈2.5–3 (Grove et al. 1998 ). 
Enricher: identity. Rank: 21
Enricher: add_abstract. Rank: 348
Enricher: add_title. Rank: 1
Enricher: add_title_and_abstract. Rank: 285


In [None]:
new_examples = data.sample(2, random_state=29)

new_rank_data = {}
for enricher_name in ['identity', 'add_abstract', 'add_title', 'add_title_and_abstract']:
    new_rank_data[enricher_name] = ranks_at_k(
        examples=new_examples,
        embedder_name='BAAI/bge-small-en',
        enricher_name=enricher_name,
        target_column='bge_norm',
        top_k=2_261_334,
        ef_search=20
    )

Embedding model: BAAI/bge-small-en, Enricher: identity


  0%|          | 0/2 [00:00<?, ?it/s]

  Query execution time: 34.42 seconds
Found 2261334 results
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND


 50%|█████     | 1/2 [00:44<00:44, 44.23s/it]

  Query execution time: 34.71 seconds
Found 2261334 results
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND


100%|██████████| 2/2 [01:28<00:00, 44.34s/it]


Embedding model: BAAI/bge-small-en, Enricher: add_abstract


  0%|          | 0/2 [00:00<?, ?it/s]

  Query execution time: 34.46 seconds
Found 2261334 results
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND


 50%|█████     | 1/2 [00:44<00:44, 44.64s/it]

  Query execution time: 34.72 seconds
Found 2261334 results
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND


100%|██████████| 2/2 [01:29<00:00, 44.53s/it]


Embedding model: BAAI/bge-small-en, Enricher: add_title


  0%|          | 0/2 [00:00<?, ?it/s]

  Query execution time: 34.76 seconds
Found 2261334 results
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND


 50%|█████     | 1/2 [00:45<00:45, 45.00s/it]

  Query execution time: 34.70 seconds
Found 2261334 results
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND


100%|██████████| 2/2 [01:29<00:00, 44.90s/it]


Embedding model: BAAI/bge-small-en, Enricher: add_title_and_abstract


  0%|          | 0/2 [00:00<?, ?it/s]

  Query execution time: 34.30 seconds
Found 2261334 results
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND


 50%|█████     | 1/2 [00:43<00:43, 43.56s/it]

  Query execution time: 35.42 seconds
Found 2261334 results
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND


100%|██████████| 2/2 [01:28<00:00, 44.34s/it]

Enricher: identity. Rank: 1159
Enricher: add_abstract. Rank: 164
Enricher: add_title. Rank: 138
Enricher: add_title_and_abstract. Rank: 229





In [8]:
for enricher_name, ranks in new_rank_data.items():
    print(f"Enricher: {enricher_name}. Rank: {ranks}")

Enricher: identity. Rank: [1159, 288, 288]
Enricher: add_abstract. Rank: [164, 2903, 2903]
Enricher: add_title. Rank: [138, 13693, 13693]
Enricher: add_title_and_abstract. Rank: [229, 4420, 4420]


In [9]:
print(new_examples.iloc[1].sent_original)
print(new_examples.iloc[1].citation_dois)

Both Hood et al. ( 2009 ) and MacTaggart and Hood ( 2009 ) reported that a second MFR formed via magnetic reconnection (see below for a detailed description of this process) underneath the bodily emerged MFR. 
['10.1051/0004-6361/200912189', '10.1051/0004-6361/200912189']


In [10]:
more_examples = data.sample(4, random_state=1)
more_rank_data = {}
for enricher_name in ['identity', 'add_abstract', 'add_title', 'add_title_and_abstract']:
    more_rank_data[enricher_name] = ranks_at_k(
        examples=more_examples,
        embedder_name='BAAI/bge-small-en',
        enricher_name=enricher_name,
        target_column='bge_norm',
        top_k=2_261_334,
        ef_search=20
    )

Embedding model: BAAI/bge-small-en, Enricher: identity


  0%|          | 0/4 [00:00<?, ?it/s]

  Query execution time: 34.51 seconds
Found 2261334 results
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND


 25%|██▌       | 1/4 [00:44<02:14, 44.79s/it]

  Query execution time: 35.54 seconds
Found 2261334 results
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND


 50%|█████     | 2/4 [01:30<01:30, 45.33s/it]

  Query execution time: 36.06 seconds
Found 2261334 results
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND


 75%|███████▌  | 3/4 [02:16<00:45, 45.69s/it]

  Query execution time: 35.24 seconds
Found 2261334 results
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND


100%|██████████| 4/4 [03:01<00:00, 45.49s/it]


Embedding model: BAAI/bge-small-en, Enricher: add_abstract


  0%|          | 0/4 [00:00<?, ?it/s]

  Query execution time: 35.42 seconds
Found 2261334 results
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND


 25%|██▌       | 1/4 [00:46<02:18, 46.04s/it]

  Query execution time: 35.21 seconds
Found 2261334 results
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND


 50%|█████     | 2/4 [01:31<01:31, 45.57s/it]

  Query execution time: 35.37 seconds
Found 2261334 results
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND


 75%|███████▌  | 3/4 [02:17<00:45, 45.66s/it]

  Query execution time: 35.10 seconds
Found 2261334 results
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND


100%|██████████| 4/4 [03:02<00:00, 45.60s/it]


Embedding model: BAAI/bge-small-en, Enricher: add_title


  0%|          | 0/4 [00:00<?, ?it/s]

  Query execution time: 35.64 seconds
Found 2261334 results
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND


 25%|██▌       | 1/4 [00:45<02:16, 45.36s/it]

  Query execution time: 35.47 seconds
Found 2261334 results
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND


 50%|█████     | 2/4 [01:30<01:31, 45.51s/it]

  Query execution time: 35.53 seconds
Found 2261334 results
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND


 75%|███████▌  | 3/4 [02:16<00:45, 45.57s/it]

  Query execution time: 35.83 seconds
Found 2261334 results
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND


100%|██████████| 4/4 [03:02<00:00, 45.70s/it]


Embedding model: BAAI/bge-small-en, Enricher: add_title_and_abstract


  0%|          | 0/4 [00:00<?, ?it/s]

  Query execution time: 36.36 seconds
Found 2261334 results
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND


 25%|██▌       | 1/4 [00:46<02:19, 46.39s/it]

  Query execution time: 35.10 seconds
Found 2261334 results
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND


 50%|█████     | 2/4 [01:31<01:31, 45.72s/it]

  Query execution time: 35.81 seconds
Found 2261334 results
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND


 75%|███████▌  | 3/4 [02:17<00:45, 45.68s/it]

  Query execution time: 35.65 seconds
Found 2261334 results
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND
FOUND


100%|██████████| 4/4 [03:02<00:00, 45.58s/it]


In [None]:
for enricher_name, ranks in new_rank_data.items():
    print(f"Enricher: {enricher_name}. Rank: {ranks}")

In [None]:
# write rank_data dict to file
import json
with open('data/rank_data.json', 'w') as f:
    json.dump(rank_data, f)
print('Rank data saved to file')

In [None]:
max(rank_data['identity']), max(rank_data['add_abstract']), max(rank_data['add_title'])

In [None]:
research = pd.read_json('data/preprocessed/research.jsonl', lines=True)
research[research.doi == '10.1093/mnras/stab3351']

In [None]:
model_names_to_filenames = {
    'BAAI/bge-small-en': 'bge',
    'bert-base-uncased': 'bert',
    'adsabs/astroBERT': 'astrobert'
}