In [1]:
import io

import pyterrier as pt
from experiment_utils.experiments_helper import get_timeit_dependencies_name, getOptimalAlpha, latency_per_query

if not pt.started():
    pt.init()

PyTerrier 0.10.0 has loaded Terrier 5.9 (built by craigm on 2024-05-02 17:40) and terrier-helper 0.0.8

No etc/terrier.properties, using terrier.default.properties for bootstrap configuration.


In [2]:
from encoders.bge_base_en import BgeQueryEncoder

package = "BAAI/"
model_name = "bge-base-en-v1.5"
q_encoder = BgeQueryEncoder(package + model_name)
model_directory = 'bge'
path_to_root = "../../"
pipeline_name = "BM25 >> bge-base-en-v1.5"

In [3]:
prefix = "irds:beir/"
test_suffix = "/test"

In [4]:
def get_timeit_dep(dataset_name):
    return get_timeit_dependencies_name(prefix + dataset_name, prefix + dataset_name + test_suffix,
                                        q_encoder,
                                        model_name,
                                        path_to_root, model_directory,
                                        alpha=getOptimalAlpha(prefix + dataset_name, pipeline_name, model_directory))

In [5]:
from contextlib import redirect_stdout


def measure_latency(dataset_name):
    results_lexical_retriever, semantic_reranker = get_timeit_dep(dataset_name)
    f = io.StringIO()
    with redirect_stdout(f):
        %timeit semantic_reranker(results_lexical_retriever)

    timeit_output = f.getvalue()
    result = latency_per_query(timeit_output, prefix + dataset_name, test_suffix, pipeline_name, model_directory)
    print(result)

In [6]:
dataset_list = ["fiqa"]

In [7]:
import traceback


def run_latency_datasets():
    for dataset_name in dataset_list:
        try:
            measure_latency(dataset_name)
        except Exception as e:
            traceback.print_exc()

In [8]:
run_latency_datasets()

100%|██████████| 57638/57638 [00:00<00:00, 1394512.51it/s]


26.7 s +- 1.02 s per loop (mean +- std. dev. of 7 runs, 1 loop each)

Latency per query: 41.2037 ms. Experiment details: 26.7 s +- 1.02 s per loop (mean +- std. dev. of 7 runs, 1 loop each)



In [9]:
model_name = "bge-small-en-v1.5"
pipeline_name = "BM25 >> " + model_name
q_encoder = BgeQueryEncoder(package + model_name)

run_latency_datasets()

100%|██████████| 3633/3633 [00:00<00:00, 1648945.62it/s]


Latency per query: 8.0495 ms. Experiment details: 2.6 s +- 101 ms per loop (mean +- std. dev. of 7 runs, 1 loop each)



100%|██████████| 57638/57638 [00:00<00:00, 425652.20it/s]


Latency per query: 18.5185 ms. Experiment details: 12 s +- 479 ms per loop (mean +- std. dev. of 7 runs, 1 loop each)



100%|██████████| 5183/5183 [00:00<00:00, 1162890.64it/s]


Latency per query: 20.9667 ms. Experiment details: 6.29 s +- 187 ms per loop (mean +- std. dev. of 7 runs, 1 loop each)



In [9]:
from encoders.gte_base_en_encoder import GTEBaseEncoder

package = "Alibaba-NLP/"
model_name = "gte-base-en-v1.5"
q_encoder = GTEBaseEncoder(package + model_name)
model_directory = 'gte_base_en_v1_5'
pipeline_name = "BM25 >> " + model_name

run_latency_datasets()

100%|██████████| 57638/57638 [00:00<00:00, 1420837.83it/s]


28.1 s +- 2.11 s per loop (mean +- std. dev. of 7 runs, 1 loop each)

Latency per query: 43.3642 ms. Experiment details: 28.1 s +- 2.11 s per loop (mean +- std. dev. of 7 runs, 1 loop each)



In [10]:
from encoders.snowflake_arctic_embed_m import SnowFlakeQueryEncoder

package = "Snowflake/"
model_name = "snowflake-arctic-embed-m"
q_encoder = SnowFlakeQueryEncoder(package + model_name)
model_directory = 'snowflake'
pipeline_name = "BM25 >> " + model_name

run_latency_datasets()

100%|██████████| 57638/57638 [00:00<00:00, 1539866.20it/s]


28.9 s +- 2.79 s per loop (mean +- std. dev. of 7 runs, 1 loop each)

Latency per query: 44.5988 ms. Experiment details: 28.9 s +- 2.79 s per loop (mean +- std. dev. of 7 runs, 1 loop each)



In [12]:
model_name = "snowflake-arctic-embed-xs"
q_encoder = SnowFlakeQueryEncoder(package + model_name)

for dataset_name in dataset_list:
    measure_latency(dataset_name)

100%|██████████| 3633/3633 [00:00<00:00, 1646095.54it/s]


Latency per query: 5.5728 ms. Experiment details: 1.8 s +- 154 ms per loop (mean +- std. dev. of 7 runs, 1 loop each)



100%|██████████| 57638/57638 [00:00<00:00, 1529084.35it/s]


Latency per query: 13.5802 ms. Experiment details: 8.8 s +- 249 ms per loop (mean +- std. dev. of 7 runs, 1 loop each)



100%|██████████| 5183/5183 [00:00<00:00, 1491020.41it/s]


Latency per query: 14.7667 ms. Experiment details: 4.43 s +- 177 ms per loop (mean +- std. dev. of 7 runs, 1 loop each)



In [11]:
dataset_list = ["fiqa", "nfcorpus", "scifact"]

In [12]:
from fast_forward.encoder import TCTColBERTQueryEncoder

package = "castorini/"
model_name = "tct_colbert-msmarco"
q_encoder = TCTColBERTQueryEncoder(package + model_name)
model_directory = 'tct_colbert'
pipeline_name = "BM25 >> " + 'tct_colbert_msmarco'

run_latency_datasets()

100%|██████████| 57638/57638 [00:00<00:00, 1257248.55it/s]


KeyboardInterrupt: 