## This notebook explores the transformers caching behaviour within Pytherrier

In [1]:
from experiment_utils.experiments_helper import load_dense_index_from_disk, load_sparse_index_from_disk

In [2]:
import pyterrier as pt

if not pt.started():
    pt.init()

PyTerrier 0.10.0 has loaded Terrier 5.9 (built by craigm on 2024-05-02 17:40) and terrier-helper 0.0.8

No etc/terrier.properties, using terrier.default.properties for bootstrap configuration.


In [3]:
from pyterrier.measures import RR, nDCG, MAP

eval_metrics = [RR @ 10, nDCG @ 10, MAP @ 100]

In [4]:
from encoders.bge_base_en import BgeQueryEncoder

package = "BAAI/"
model_name = "bge-base-en-v1.5"
q_encoder = BgeQueryEncoder(package + model_name)
model_directory = 'bge'
path_to_root = "../../"
pipeline_name = "BM25 >> bge-base-en-v1.5"

In [5]:
from experiment_utils.experiments_helper import get_test_dev_sets
from fast_forward.util.pyterrier import FFScore
from fast_forward_indexes_library_enhancements.pipeline_transformers import FFInterpolateNormalized

dataset_name = "irds:beir/nfcorpus"
dev_set_name = "irds:beir/nfcorpus/dev"
dataset_test_name = "irds:beir/nfcorpus/test"
pipeline_name = "BM25 >> " + model_name

test_topics, test_qrels, dev_topics, dev_qrels = get_test_dev_sets(dataset_test_name, dev_set_name)

# Spare index
retriever = load_sparse_index_from_disk(dataset_name, path_to_root, in_memory=True)

# Dense index
dense_index = load_dense_index_from_disk(dataset_name, q_encoder, model_name, path_to_root, model_directory,
                                         in_memory=True)

ff_score = FFScore(dense_index)
# ff_int = FFInterpolate(alpha=alpha)
ff_int = FFInterpolateNormalized(alpha=0.3)

pipeline1 = retriever % 1000 >> ff_score >> ff_int
pipeline2 = ~retriever % 1000 >> ff_score >> ff_int

100%|██████████| 3633/3633 [00:00<00:00, 1658637.90it/s]


In [6]:
%timeit pipeline1(test_topics)

8.76 s ± 600 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [8]:
%timeit pipeline2(test_topics)

6.82 s ± 175 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [10]:
pipeline3 = retriever % 100 >> ff_score >> ff_int
pipeline4 = ~retriever % 100 >> ff_score >> ff_int

In [11]:
% timeit pipeline3(test_topics)

7.13 s ± 172 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [12]:
% timeit pipeline4(test_topics)

5.52 s ± 251 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [13]:
from experiment_utils.experiments_helper import get_test_dev_sets
from fast_forward.util.pyterrier import FFScore
from fast_forward_indexes_library_enhancements.pipeline_transformers import FFInterpolateNormalized
from fast_forward_indexes_library_enhancements.disk import OnDiskIndex

dataset_name = "irds:beir/fiqa"
dev_set_name = "irds:beir/fiqa/dev"
dataset_test_name = "irds:beir/fiqa/test"
pipeline_name = "BM25 >> " + model_name

test_topics, test_qrels, dev_topics, dev_qrels = get_test_dev_sets(dataset_test_name, dev_set_name)

# Spare index
retriever = load_sparse_index_from_disk(dataset_name, path_to_root, in_memory=True)

# Dense index
dense_index = load_dense_index_from_disk(dataset_name, q_encoder, model_name, path_to_root, model_directory,
                                         in_memory=True)

ff_score = FFScore(dense_index)
# ff_int = FFInterpolate(alpha=alpha)
ff_int = FFInterpolateNormalized(alpha=0.3)

pipeline5 = retriever % 1000 >> ff_score >> ff_int
pipeline6 = ~retriever % 1000 >> ff_score >> ff_int

100%|██████████| 57638/57638 [00:00<00:00, 343937.06it/s]


In [14]:
% timeit pipeline5(test_topics)

49.7 s ± 12.7 s per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [15]:
% timeit pipeline6(test_topics)

33.9 s ± 4.67 s per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [16]:
pipeline7 = retriever % 100 >> ff_score >> ff_int
pipeline8 = ~retriever % 100 >> ff_score >> ff_int

In [5]:
from experiment_utils.experiments_helper import get_test_dev_sets
from fast_forward.util.pyterrier import FFScore
from fast_forward_indexes_library_enhancements.pipeline_transformers import FFInterpolateNormalized

dataset_name = "irds:beir/nfcorpus"
dev_set_name = "irds:beir/nfcorpus/dev"
dataset_test_name = "irds:beir/nfcorpus/test"
pipeline_name = "BM25 >> " + model_name

test_topics, test_qrels, dev_topics, dev_qrels = get_test_dev_sets(dataset_test_name, dev_set_name)

# Spare index
retriever = load_sparse_index_from_disk(dataset_name, path_to_root, in_memory=True)

# Dense index
dense_index = load_dense_index_from_disk(dataset_name, q_encoder, model_name, path_to_root, model_directory,
                                         in_memory=True)

ff_score = FFScore(dense_index)
# ff_int = FFInterpolate(alpha=alpha)
ff_int = FFInterpolateNormalized(alpha=0.3)

pipeline9 = retriever
pipeline10 = ~retriever

100%|██████████| 3633/3633 [00:00<00:00, 1484597.28it/s]


In [7]:
%timeit pipeline9(test_topics)

1.98 s ± 253 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [8]:
%timeit pipeline10(test_topics)

149 ms ± 6.68 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [12]:
pt.Experiment(
    [pipeline1, pipeline2, pipeline9, pipeline10],
     test_topics,
     test_qrels,
     eval_metrics = [RR @ 10, nDCG @ 10, MAP @ 100],
    names=["FFI", "~BM25 in FFI", "BM25", "~BM25"]
)

Unnamed: 0,name,RR@10,nDCG@10,AP@100
0,FFI,0.577725,0.363031,0.16908
1,~BM25 in FFI,0.577725,0.363031,0.16908
2,BM25,0.534378,0.322219,0.143582
3,~BM25,0.534378,0.322219,0.143582
