In [1]:
import pyterrier as pt

if not pt.started():

    pt.init(tqdm="notebook")

PyTerrier 0.10.0 has loaded Terrier 5.9 (built by craigm on 2024-05-02 17:40) and terrier-helper 0.0.8

No etc/terrier.properties, using terrier.default.properties for bootstrap configuration.


In [2]:
dataset = pt.get_dataset("irds:beir/fiqa")

In [3]:
index_path = "./sparse_index_fiqa"
# Load index to memory
index = pt.IndexFactory.of(index_path, memory=True)

In [4]:
bm25 = pt.BatchRetrieve(index, wmodel="BM25")

In [5]:
testset = pt.get_dataset("irds:beir/fiqa/test")

In [6]:
bm25 = bm25 % 100

In [7]:
first_stage_res = bm25.transform( testset.get_topics())

In [8]:
from fast_forward.encoder import TCTColBERTQueryEncoder, TCTColBERTDocumentEncoder
import torch

q_encoder = TCTColBERTQueryEncoder("castorini/tct_colbert-msmarco")
d_encoder = TCTColBERTDocumentEncoder(
    "castorini/tct_colbert-msmarco",
    device="cuda:0" if torch.cuda.is_available() else "cpu",
)

In [9]:
from fast_forward import OnDiskIndex, Mode
from pathlib import Path

ff_index = OnDiskIndex.load(
    Path("./dense_index_fiqa_GTE-base/ffindex_fiqa_gte-base-en-v1.5.h5"), query_encoder=q_encoder, mode=Mode.MAXP
).to_memory()

100%|██████████| 57638/57638 [00:00<00:00, 693060.53it/s]


In [10]:
from fast_forward.util.pyterrier import FFScore
from fast_forward.util.pyterrier import FFInterpolate

ff_score = FFScore(ff_index)
ff_int = FFInterpolate(alpha=0.05) #added the optimal version from the beginning in order to avoid running GridsSearch again

In [11]:
reranking_stage = ff_score >> ff_int

In [12]:
res_after_dense_scoring = ff_score(first_stage_res)

In [13]:
def time_reranking():
    reranking_stage(first_stage_res)

In [14]:
def time_reranking_first():
    ff_score(first_stage_res)

In [15]:
def time_reranking_second():
    ff_int(res_after_dense_scoring)

In [16]:
import timeit
execution_time = timeit.timeit(time_reranking, number=3)
print(execution_time)

54.64176323700303


In [17]:
import timeit
execution_time = timeit.timeit(time_reranking_first, number=3)
print(execution_time)

64.06021643300119


In [18]:
import timeit
execution_time = timeit.timeit(time_reranking_second, number=3)
print(execution_time)

0.02934869999808143


In [16]:
%timeit ff_score(first_stage_res)

23.1 s ± 1.44 s per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [17]:
%timeit reranking_stage(first_stage_res)

22.5 s ± 1.36 s per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [18]:
%timeit ff_int(res_after_dense_scoring)

6.89 ms ± 578 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
