In [1]:
import io

import pyterrier as pt
from experiment_utils.experiments_helper import get_timeit_dependencies_name, getOptimalAlpha, latency_per_query

if not pt.started():
    pt.init()

PyTerrier 0.10.0 has loaded Terrier 5.9 (built by craigm on 2024-05-02 17:40) and terrier-helper 0.0.8

No etc/terrier.properties, using terrier.default.properties for bootstrap configuration.


In [2]:
from encoders.bge_base_en import BgeQueryEncoder

package = "BAAI/"
model_name = "bge-base-en-v1.5"
q_encoder = BgeQueryEncoder(package + model_name)
model_directory = 'bge'
path_to_root = "../../"
pipeline_name = "BM25 >> bge-base-en-v1.5"

In [3]:
prefix = "irds:beir/"
test_suffix = "/test"

In [4]:
def get_timeit_dep(dataset_name):
    return get_timeit_dependencies_name(prefix + dataset_name, prefix + dataset_name + test_suffix,
                                        q_encoder,
                                        model_name,
                                        path_to_root, model_directory,
                                        alpha=getOptimalAlpha(prefix + dataset_name, pipeline_name, model_directory))

In [5]:
from contextlib import redirect_stdout


def measure_latency(dataset_name):
    results_lexical_retriever, semantic_reranker = get_timeit_dep(dataset_name)
    f = io.StringIO()
    with redirect_stdout(f):
        %timeit semantic_reranker(results_lexical_retriever)

    timeit_output = f.getvalue()
    result = latency_per_query(timeit_output, prefix + dataset_name, test_suffix, pipeline_name, model_directory)
    print(result)


In [6]:
import traceback


def run_latency_datasets():
    for dataset_name in dataset_list:
        try:
            measure_latency(dataset_name)
        except Exception as e:
            traceback.print_exc()

In [6]:
dataset_list = ["fiqa"]

In [8]:
run_latency_datasets()

100%|██████████| 57638/57638 [00:00<00:00, 1394512.51it/s]


26.7 s +- 1.02 s per loop (mean +- std. dev. of 7 runs, 1 loop each)

Latency per query: 41.2037 ms. Experiment details: 26.7 s +- 1.02 s per loop (mean +- std. dev. of 7 runs, 1 loop each)



In [9]:
model_name = "bge-small-en-v1.5"
pipeline_name = "BM25 >> " + model_name
q_encoder = BgeQueryEncoder(package + model_name)

run_latency_datasets()

100%|██████████| 3633/3633 [00:00<00:00, 1648945.62it/s]


Latency per query: 8.0495 ms. Experiment details: 2.6 s +- 101 ms per loop (mean +- std. dev. of 7 runs, 1 loop each)



100%|██████████| 57638/57638 [00:00<00:00, 425652.20it/s]


Latency per query: 18.5185 ms. Experiment details: 12 s +- 479 ms per loop (mean +- std. dev. of 7 runs, 1 loop each)



100%|██████████| 5183/5183 [00:00<00:00, 1162890.64it/s]


Latency per query: 20.9667 ms. Experiment details: 6.29 s +- 187 ms per loop (mean +- std. dev. of 7 runs, 1 loop each)



In [9]:
from encoders.gte_base_en_encoder import GTEBaseEncoder

package = "Alibaba-NLP/"
model_name = "gte-base-en-v1.5"
q_encoder = GTEBaseEncoder(package + model_name)
model_directory = 'gte_base_en_v1_5'
pipeline_name = "BM25 >> " + model_name

run_latency_datasets()

100%|██████████| 57638/57638 [00:00<00:00, 1420837.83it/s]


28.1 s +- 2.11 s per loop (mean +- std. dev. of 7 runs, 1 loop each)

Latency per query: 43.3642 ms. Experiment details: 28.1 s +- 2.11 s per loop (mean +- std. dev. of 7 runs, 1 loop each)



In [10]:
from encoders.snowflake_arctic_embed_m import SnowFlakeQueryEncoder

package = "Snowflake/"
model_name = "snowflake-arctic-embed-m"
q_encoder = SnowFlakeQueryEncoder(package + model_name)
model_directory = 'snowflake'
pipeline_name = "BM25 >> " + model_name

run_latency_datasets()

100%|██████████| 57638/57638 [00:00<00:00, 1539866.20it/s]


28.9 s +- 2.79 s per loop (mean +- std. dev. of 7 runs, 1 loop each)

Latency per query: 44.5988 ms. Experiment details: 28.9 s +- 2.79 s per loop (mean +- std. dev. of 7 runs, 1 loop each)



In [7]:
dataset_list = ["fiqa", "nfcorpus", "scifact"]

In [8]:
from encoders.snowflake_arctic_embed_m import SnowFlakeQueryEncoder
package = "Snowflake/"
model_name = "snowflake-arctic-embed-xs"
model_directory = 'snowflake'
q_encoder = SnowFlakeQueryEncoder(package + model_name)
pipeline_name = "BM25 >> " + model_name

for dataset_name in dataset_list:
    measure_latency(dataset_name)

100%|██████████| 57638/57638 [00:00<00:00, 1488891.38it/s]


8.94 s +- 192 ms per loop (mean +- std. dev. of 7 runs, 1 loop each)

Latency per query: 13.7963 ms. Experiment details: 8.94 s +- 192 ms per loop (mean +- std. dev. of 7 runs, 1 loop each)



100%|██████████| 3633/3633 [00:00<00:00, 1637604.13it/s]


1.83 s +- 108 ms per loop (mean +- std. dev. of 7 runs, 1 loop each)

Latency per query: 5.6656 ms. Experiment details: 1.83 s +- 108 ms per loop (mean +- std. dev. of 7 runs, 1 loop each)



100%|██████████| 5183/5183 [00:00<00:00, 1481368.15it/s]


4.47 s +- 163 ms per loop (mean +- std. dev. of 7 runs, 1 loop each)

Latency per query: 14.9 ms. Experiment details: 4.47 s +- 163 ms per loop (mean +- std. dev. of 7 runs, 1 loop each)



In [8]:
from fast_forward.encoder import TCTColBERTQueryEncoder

package = "castorini/"
model_name = "tct_colbert-msmarco"
q_encoder = TCTColBERTQueryEncoder(package + model_name)
model_directory = 'tct_colbert'
pipeline_name = "BM25 >> " + 'tct_colbert_msmarco'

run_latency_datasets()

100%|██████████| 57638/57638 [00:00<00:00, 1372004.42it/s]


31.1 s +- 2.65 s per loop (mean +- std. dev. of 7 runs, 1 loop each)

Latency per query: 47.9938 ms. Experiment details: 31.1 s +- 2.65 s per loop (mean +- std. dev. of 7 runs, 1 loop each)



100%|██████████| 3633/3633 [00:00<00:00, 1394518.75it/s]


10.9 s +- 321 ms per loop (mean +- std. dev. of 7 runs, 1 loop each)

Latency per query: 33.7461 ms. Experiment details: 10.9 s +- 321 ms per loop (mean +- std. dev. of 7 runs, 1 loop each)



100%|██████████| 5183/5183 [00:00<00:00, 1342747.23it/s]


13 s +- 251 ms per loop (mean +- std. dev. of 7 runs, 1 loop each)

Latency per query: 43.3333 ms. Experiment details: 13 s +- 251 ms per loop (mean +- std. dev. of 7 runs, 1 loop each)



In [7]:
dataset_list = ["fiqa", "nfcorpus", "scifact"]

In [8]:
from encoders.e5 import E5QueryEncoder
package = "intfloat/"
model_name = "e5-base-v2"
model_directory = 'e5'
q_encoder = E5QueryEncoder(package + model_name)
pipeline_name = "BM25 >> " + model_name

run_latency_datasets()

100%|██████████| 57638/57638 [00:00<00:00, 1344900.78it/s]


25.4 s +- 1.32 s per loop (mean +- std. dev. of 7 runs, 1 loop each)

Latency per query: 39.1975 ms. Experiment details: 25.4 s +- 1.32 s per loop (mean +- std. dev. of 7 runs, 1 loop each)



100%|██████████| 3633/3633 [00:00<00:00, 1115594.58it/s]


5.43 s +- 268 ms per loop (mean +- std. dev. of 7 runs, 1 loop each)

Latency per query: 16.8111 ms. Experiment details: 5.43 s +- 268 ms per loop (mean +- std. dev. of 7 runs, 1 loop each)



100%|██████████| 5183/5183 [00:00<00:00, 1285499.24it/s]


13.8 s +- 358 ms per loop (mean +- std. dev. of 7 runs, 1 loop each)

Latency per query: 46.0 ms. Experiment details: 13.8 s +- 358 ms per loop (mean +- std. dev. of 7 runs, 1 loop each)



In [9]:
from encoders.e5 import E5QueryEncoder
model_name = "e5-small-v2"
q_encoder = E5QueryEncoder(package + model_name)
pipeline_name = "BM25 >> " + model_name

run_latency_datasets()

100%|██████████| 57638/57638 [00:00<00:00, 1384735.59it/s]


11.6 s +- 361 ms per loop (mean +- std. dev. of 7 runs, 1 loop each)

Latency per query: 17.9012 ms. Experiment details: 11.6 s +- 361 ms per loop (mean +- std. dev. of 7 runs, 1 loop each)



100%|██████████| 3633/3633 [00:00<00:00, 1648945.62it/s]


2.25 s +- 131 ms per loop (mean +- std. dev. of 7 runs, 1 loop each)

Latency per query: 6.9659 ms. Experiment details: 2.25 s +- 131 ms per loop (mean +- std. dev. of 7 runs, 1 loop each)



100%|██████████| 5183/5183 [00:00<00:00, 1462335.37it/s]


6.08 s +- 349 ms per loop (mean +- std. dev. of 7 runs, 1 loop each)

Latency per query: 20.2667 ms. Experiment details: 6.08 s +- 349 ms per loop (mean +- std. dev. of 7 runs, 1 loop each)



In [10]:
from encoders.e5 import E5QueryEncoder
model_name = "e5-base-unsupervised"
q_encoder = E5QueryEncoder(package + model_name)
pipeline_name = "BM25 >> " + model_name

run_latency_datasets()

100%|██████████| 57638/57638 [00:00<00:00, 1397850.73it/s]


23.4 s +- 1.6 s per loop (mean +- std. dev. of 7 runs, 1 loop each)

Latency per query: 36.1111 ms. Experiment details: 23.4 s +- 1.6 s per loop (mean +- std. dev. of 7 runs, 1 loop each)



100%|██████████| 3633/3633 [00:00<00:00, 1267712.68it/s]


5.32 s +- 192 ms per loop (mean +- std. dev. of 7 runs, 1 loop each)

Latency per query: 16.4706 ms. Experiment details: 5.32 s +- 192 ms per loop (mean +- std. dev. of 7 runs, 1 loop each)



100%|██████████| 5183/5183 [00:00<00:00, 1334422.54it/s]


13.4 s +- 215 ms per loop (mean +- std. dev. of 7 runs, 1 loop each)

Latency per query: 44.6667 ms. Experiment details: 13.4 s +- 215 ms per loop (mean +- std. dev. of 7 runs, 1 loop each)

