In [1]:
%load_ext autoreload
%autoreload 2

from time import time
import pandas as pd
import numpy as np
import os
from collections import Counter, defaultdict
import pickle

In [2]:
import sys
sys.path.insert(0, "/data3/muntean/DRhard")

In [3]:
import argparse
import subprocess
import sys
sys.path.append("./")
import faiss
import logging
import os
import numpy as np
# import torch
from transformers import RobertaConfig
from tqdm import tqdm
from torch.utils.data.dataloader import DataLoader
from torch.utils.data.sampler import SequentialSampler

from model import RobertaDot
from dataset import (
    TextTokenIdsCache, load_rel, SubsetSeqDataset, SequenceDataset,
    single_get_collate_function
)
from retrieve_utils import (
    construct_flatindex_from_embeddings, 
    index_retrieve, convert_index_to_gpu
)
logger = logging.Logger(__name__)

In [4]:
doc_memmap_path = "/data3/muntean/DRhard/data/passage/evaluate/star/passages.memmap"
docid_memmap_path = "/data3/muntean/DRhard/data/passage/evaluate/star/passages-id.memmap"
query_memmap_path = "/data3/muntean/DRhard/data/passage/evaluate/star/test-manual-query.memmap"
queryids_memmap_path = "/data3/muntean/DRhard/data/passage/evaluate/star/test-manual-query-id.memmap"

In [5]:
doc_embeddings = np.memmap(doc_memmap_path, dtype=np.float32, mode="r")
doc_ids = np.memmap(docid_memmap_path, dtype=np.int32, mode="r")
doc_embeddings = doc_embeddings.reshape(-1, 768)

query_embeddings = np.memmap(query_memmap_path, dtype=np.float32, mode="r")
query_embeddings = query_embeddings.reshape(-1, 768)
query_ids = np.memmap(queryids_memmap_path, dtype=np.int32, mode="r")

In [6]:
%time
index = construct_flatindex_from_embeddings(doc_embeddings, doc_ids)

CPU times: user 2 µs, sys: 2 µs, total: 4 µs
Wall time: 7.63 µs
embedding shape: (38626614, 768)
(38626614,) int64


In [7]:
type(index)

faiss.swigfaiss.IndexIDMap2

# Select certain queries and certain docs for small index

In [8]:
# Load our qid and docid remapping dictionaries

# query id dict
qid_mapping_path = "/data3/muntean/DRhard/data/passage/dataset/queries.CASTmanual.QID2newID.test.tsv"
queries_df = pd.read_csv(qid_mapping_path, delimiter="\t", header=None)
print(len(queries_df))

# collection id dict
collection_mapping_path = "/data3/muntean/DRhard/data/passage/dataset/CASTcollectionPID2newID.tsv"
collection_df = pd.read_csv(collection_mapping_path, delimiter="\t", header=None)
print(len(collection_df))

479
38626614


In [9]:
qid2newqid_dict = dict(zip(queries_df[0], queries_df[1])) 
pid2newpid_dict = dict(zip(collection_df[0], collection_df[1])) 

In [10]:
qid2newqid_dict["32_1"]

9

In [11]:
# Create reverse dictionaries
newqid2qid_dict = dict(zip(queries_df[1], queries_df[0])) 
newpid2pid_dict = dict(zip(collection_df[1], collection_df[0])) 

In [12]:
newqid2qid_dict[9]

'32_1'

In [13]:
# DRhard docid and qid encoding
preprocess_dir = "/data3/muntean/DRhard/data/passage/preprocess"

pid2offset = pickle.load(open(os.path.join(preprocess_dir, "pid2offset.pickle"), 'rb'))
offset2pid = {v:k for k, v in pid2offset.items()}
qid2offset = pickle.load(open(os.path.join(preprocess_dir, f"test-manual-qid2offset.pickle"), 'rb'))
offset2qid = {v:k for k, v in qid2offset.items()}

In [14]:
qid2offset[9]

9

# Create conv cache

In [15]:
topk = 2000 # cache dimension [1000,2000,5000,10000]
# evaluation is done on top 1000 - another topk?

In [16]:
# distance dicts

cache_radius_dict = dict() # between first utterance (qa) and last retrieved doc from the big index
query_distance_dict = dict() # distance between the first (qa) and the rest of utterances of the conversation (qb)
query_radius_dict = dict() # between current utterance (qb) and last retrieved doc from the big index
diff_distance_dict = dict() # rb_hat = ra - d(qb, qa)

In [17]:
def l2_distance(v1,v2):
    return np.linalg.norm(v1-v2)

In [49]:
def create_conv_cache(conv_id, qid2newqid_dict, qid2offset, query_embeddings, doc_embeddings, 
                      index, topk, cache_radius_dict):
    # first utt of the conversation - determines the size of the cache
    first_qid = conv_id + "_1"
    newqid = qid2newqid_dict[first_qid] #added first
    qid_offset = qid2offset[newqid]

    # prendere il memmap
    query_emb = query_embeddings[qid_offset].reshape(1, 768)
    
    # fare retireval nel indice grande e prendere top 2000 documenti
    faiss.omp_set_num_threads(16) #32
    nearest_neighbors = index_retrieve(index, query_emb, topk, batch=32)
    
    # select doc embeddings, paired with ids
    small_doc_emb = doc_embeddings[nearest_neighbors[0]]
    small_doc_ids = np.array(nearest_neighbors[0])
    index_conv = construct_flatindex_from_embeddings(small_doc_emb, small_doc_ids)
       
    # compute distance between the first query and last doc in the list of topk retrieved that are stored in cache (e.g., r_q_i)
    last_doc = nearest_neighbors[0][-1]
    last_doc_embedding = doc_embeddings[last_doc]
    cache_radius_dict[first_qid] = l2_distance(query_emb, last_doc_embedding)
    
    return index_conv, nearest_neighbors, cache_radius_dict

In [50]:
# Create cache for conv 31
index_conv, nearest_neighbors_qa, cache_radius_dict = create_conv_cache("31", qid2newqid_dict, 
                                                                     qid2offset, query_embeddings, 
                                                                     doc_embeddings, index, topk, 
                                                                     cache_radius_dict)

Query Num 1


100%|█████████████████████████████████████████████████████| 1/1 [00:32<00:00, 32.48s/it]

Elapsed Time: 32.5s, Elapsed Time per query: 32486.3ms
embedding shape: (2000, 768)
(2000,) int64





In [51]:
cache_radius_dict

{'31_1': 5.1092577}

In [28]:
len(nearest_neighbors_qa[0])

2000

In [29]:
# save docs for qa for coverage
retrieved_qa = nearest_neighbors_qa[0]

In [31]:
# first  query id & embedding
first_qid = "31_1"
first_newqid = qid2newqid_dict[first_qid] #added first
first_qid_offset = qid2offset[first_newqid]
first_qid_offset

0

In [35]:
results_list_qa = []
# save results - top 1000 for first conv query qa
for idx, pid in enumerate(nearest_neighbors_qa[0][:1000]):
    results_list_qa.append((first_qid_offset, pid, idx+1))
results_list_qa[:10]

[(0, 3878310, 1),
 (0, 789615, 2),
 (0, 291001, 3),
 (0, 5625323, 4),
 (0, 2954424, 5),
 (0, 6430388, 6),
 (0, 4976140, 7),
 (0, 7069008, 8),
 (0, 3705851, 9),
 (0, 19317044, 10)]

In [38]:
# prendere il memmap di qa
first_query_emb = query_embeddings[first_qid_offset].reshape(1, 768)
# first_query_emb

In [42]:
qb_id = "31_2"

In [43]:
# select query embedding
newqid = qid2newqid_dict[qb_id]
qid_offset = qid2offset[newqid]
# prendere il memmap
query_emb = query_embeddings[qid_offset].reshape(1, 768)

In [44]:
query_emb

memmap([[-9.98741984e-01,  3.03324759e-01, -4.50298727e-01,
          2.55340219e-01, -7.40765333e-01, -6.88003659e-01,
          1.66195786e+00, -5.47632515e-01,  1.68535888e+00,
          3.49659741e-01,  2.78651208e-01,  1.37620175e+00,
          1.65046170e-01, -1.41864347e+00, -2.67653435e-01,
         -1.76145780e+00,  6.53178692e-01, -1.37068197e-01,
         -5.54729137e-04, -6.27226412e-01, -4.92283046e-01,
          7.48788416e-01, -3.95853549e-01, -5.91552615e-01,
          4.99970317e-01, -8.49153161e-01, -7.66879916e-02,
         -8.57331038e-01,  8.90509129e-01,  6.91347301e-01,
         -4.62582350e-01, -1.12830174e+00,  7.49185905e-02,
          2.59847585e-02,  1.35250473e+00, -1.32519913e+00,
          1.06401420e+00, -1.29503000e+00,  1.63081318e-01,
          7.57634997e-01,  1.55356482e-01,  7.05008864e-01,
         -1.38746428e+00,  2.96265841e-01,  9.91439283e-01,
          1.53246868e+00, -1.90052092e+00, -1.43841231e+00,
         -1.46393314e-01, -1.42840147e-0

In [45]:
l2_distance(first_query_emb,query_emb)

3.9169831

In [47]:
# retrieve docs for qb
faiss.omp_set_num_threads(16) #32
nearest_neighbors_qb = index_retrieve(index_conv, query_emb, 1000, batch=32)
print("Retrieved top 1000 for", qb_id)

Query Num 1


100%|████████████████████████████████████████████████████| 1/1 [00:00<00:00, 240.39it/s]

Elapsed Time: 0.0s, Elapsed Time per query: 7.8ms
Retrieved top 1000 for 31_2





In [None]:

coverage1 = {} # top10 ranking for each query from the BIG index
coverage2 = {} # list of triples (3,5,10) for each qb


            
            # save results - top1000, for qb
            for idx, pid in enumerate(nearest_neighbors[0]):
                results_list.append((qid_offset, pid, idx+1))
            
            print("Computing distances and coverage!")
            #### Compute DISTANCES rb_hat
            
            # compute distance between qb and qa
            query_distance_dict[qid] = l2_distance(query_emb, first_query_emb)
            
            # compute rb_capuccio = ra - d(qb, qa)
            diff_distance_dict[qid] = cache_radius_dict[first_qid] - query_distance_dict[qid]
            
            #### Compute COVERAGE 
            # Cov1 - intersezione tra Ba ed Bb_hat
            # distance between qb and each doc in top1000 from cache and check if < rb_hat
            # put them in a list - exact match - and intersect with top k in cache
            retrieved_qb_in_rb_hat = []
            small_doc_emb = doc_embeddings[nearest_neighbors[0]]
            for doc_id, doc_emb in zip(nearest_neighbors[0], small_doc_emb):
                if l2_distance(query_emb, doc_emb) < diff_distance_dict[qid]:
                    retrieved_qb_in_rb_hat.append(doc_id)
            coverage1[qid]=len(retrieved_qb_in_rb_hat)
            print("Finished for Rb hat!")
            
            #### Compute DISTANCES rb
            
            #compute distance between qb and the last retrieved doc from INDEX
            first_10_docs = index_retrieve(index, query_emb, 10, batch=32)
            # 3 raggi rb per 3,5,10
            rb_dist_list = []
            for i in [3,5,10]:
                dist_rb = l2_distance(query_emb, first_10_docs[0][i-1])
                rb_dist_list.append(dist_rb)
            query_radius_dict[qid] = rb_dist_list
            print("Finished retrieving in the big index!")
            
            # save top10 for qb in INDEX
            results_list_rb= first_10_docs[0][:10]
            result_list_qb_in_cache = nearest_neighbors[0][:10]
        
            #### Compute COVERAGE          
            # Cov2 - intersezione tra risultati di query qb su cache e query qb su indice per k=3,5,10
            num_intersection = []
            for cut_off in [3,5,10]:
                elem_in_common = set(results_list_rb[:cut_off]).intersection(result_list_qb_in_cache[:cut_off])
                num_intersection.append(len(elem_in_common))
            coverage2[qid] = num_intersection
            
            print("finished with qid: ", qid)

Starting conv:  31
Query Num 1




  0%|                                                             | 0/1 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████| 1/1 [01:13<00:00, 73.13s/it][A[A


Elapsed Time: 73.1s, Elapsed Time per query: 73137.9ms
embedding shape: (2000, 768)
(2000,) int64
Create cache for:  31
Processing qid: 31_2
Query Num 1




100%|████████████████████████████████████████████████████| 1/1 [00:00<00:00, 167.30it/s][A[A


Elapsed Time: 0.0s, Elapsed Time per query: 10.3ms
Retrieved top 1000 for 31_2
Computing distances and coverage!
Finished for Rb hat!
Query Num 1




  0%|                                                             | 0/1 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████| 1/1 [01:09<00:00, 69.53s/it][A[A


Elapsed Time: 69.5s, Elapsed Time per query: 69532.7ms
Finished retrieving in the big index!
finished with qid:  31_2
Processing qid: 31_3
Query Num 1




100%|████████████████████████████████████████████████████| 1/1 [00:00<00:00, 237.56it/s][A[A


Elapsed Time: 0.0s, Elapsed Time per query: 7.1ms
Retrieved top 1000 for 31_3
Computing distances and coverage!
Finished for Rb hat!
Query Num 1




  0%|                                                             | 0/1 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████| 1/1 [01:08<00:00, 68.38s/it][A[A


Elapsed Time: 68.4s, Elapsed Time per query: 68379.0ms
Finished retrieving in the big index!
finished with qid:  31_3
Processing qid: 31_4
Query Num 1




100%|█████████████████████████████████████████████████████| 1/1 [00:00<00:00, 92.21it/s][A[A


Elapsed Time: 0.0s, Elapsed Time per query: 14.9ms
Retrieved top 1000 for 31_4
Computing distances and coverage!
Finished for Rb hat!
Query Num 1




  0%|                                                             | 0/1 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████| 1/1 [01:10<00:00, 70.37s/it][A[A


Elapsed Time: 70.4s, Elapsed Time per query: 70375.2ms
Finished retrieving in the big index!
finished with qid:  31_4
Processing qid: 31_5
Query Num 1




100%|█████████████████████████████████████████████████████| 1/1 [00:00<00:00, 90.52it/s][A[A


Elapsed Time: 0.0s, Elapsed Time per query: 15.3ms
Retrieved top 1000 for 31_5
Computing distances and coverage!
Finished for Rb hat!
Query Num 1




  0%|                                                             | 0/1 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████| 1/1 [01:08<00:00, 68.94s/it][A[A


Elapsed Time: 68.9s, Elapsed Time per query: 68938.1ms
Finished retrieving in the big index!
finished with qid:  31_5
Processing qid: 31_6
Query Num 1




100%|████████████████████████████████████████████████████| 1/1 [00:00<00:00, 178.60it/s][A[A


Elapsed Time: 0.0s, Elapsed Time per query: 10.0ms
Retrieved top 1000 for 31_6
Computing distances and coverage!
Finished for Rb hat!
Query Num 1




  0%|                                                             | 0/1 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████| 1/1 [01:09<00:00, 69.57s/it][A[A


Elapsed Time: 69.6s, Elapsed Time per query: 69568.5ms
Finished retrieving in the big index!
finished with qid:  31_6
Processing qid: 31_7
Query Num 1




100%|████████████████████████████████████████████████████| 1/1 [00:00<00:00, 153.60it/s][A[A


Elapsed Time: 0.0s, Elapsed Time per query: 10.8ms
Retrieved top 1000 for 31_7
Computing distances and coverage!
Finished for Rb hat!
Query Num 1




  0%|                                                             | 0/1 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████| 1/1 [01:11<00:00, 71.38s/it][A[A


Elapsed Time: 71.4s, Elapsed Time per query: 71387.1ms
Finished retrieving in the big index!
finished with qid:  31_7
Processing qid: 31_8
Query Num 1




100%|████████████████████████████████████████████████████| 1/1 [00:00<00:00, 239.41it/s][A[A


Elapsed Time: 0.0s, Elapsed Time per query: 6.9ms
Retrieved top 1000 for 31_8
Computing distances and coverage!
Finished for Rb hat!
Query Num 1




  0%|                                                             | 0/1 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████| 1/1 [01:09<00:00, 69.64s/it][A[A


Elapsed Time: 69.6s, Elapsed Time per query: 69638.3ms
Finished retrieving in the big index!
finished with qid:  31_8
Processing qid: 31_9
Query Num 1




100%|████████████████████████████████████████████████████| 1/1 [00:00<00:00, 139.05it/s][A[A


Elapsed Time: 0.0s, Elapsed Time per query: 10.5ms
Retrieved top 1000 for 31_9
Computing distances and coverage!
Finished for Rb hat!
Query Num 1




  0%|                                                             | 0/1 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████| 1/1 [01:10<00:00, 70.39s/it][A[A


Elapsed Time: 70.4s, Elapsed Time per query: 70396.6ms
Finished retrieving in the big index!
finished with qid:  31_9
Starting conv:  32
Query Num 1




  0%|                                                             | 0/1 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████| 1/1 [01:08<00:00, 68.79s/it][A[A


Elapsed Time: 68.8s, Elapsed Time per query: 68791.3ms
embedding shape: (2000, 768)
(2000,) int64
Create cache for:  32
Processing qid: 32_2
Query Num 1




100%|████████████████████████████████████████████████████| 1/1 [00:00<00:00, 103.77it/s][A[A


Elapsed Time: 0.0s, Elapsed Time per query: 13.5ms
Retrieved top 1000 for 32_2
Computing distances and coverage!
Finished for Rb hat!
Query Num 1




  0%|                                                             | 0/1 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████| 1/1 [01:10<00:00, 70.46s/it][A[A


Elapsed Time: 70.5s, Elapsed Time per query: 70466.8ms
Finished retrieving in the big index!
finished with qid:  32_2
Processing qid: 32_3
Query Num 1




100%|████████████████████████████████████████████████████| 1/1 [00:00<00:00, 271.53it/s][A[A


Elapsed Time: 0.0s, Elapsed Time per query: 10.9ms
Retrieved top 1000 for 32_3
Computing distances and coverage!
Finished for Rb hat!
Query Num 1




  0%|                                                             | 0/1 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████| 1/1 [01:08<00:00, 68.43s/it][A[A


Elapsed Time: 68.4s, Elapsed Time per query: 68434.6ms
Finished retrieving in the big index!
finished with qid:  32_3
Processing qid: 32_4
Query Num 1




100%|████████████████████████████████████████████████████| 1/1 [00:00<00:00, 322.39it/s][A[A


Elapsed Time: 0.0s, Elapsed Time per query: 5.7ms
Retrieved top 1000 for 32_4
Computing distances and coverage!
Finished for Rb hat!
Query Num 1




  0%|                                                             | 0/1 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████| 1/1 [01:08<00:00, 68.70s/it][A[A


Elapsed Time: 68.7s, Elapsed Time per query: 68704.8ms
Finished retrieving in the big index!
finished with qid:  32_4
Processing qid: 32_5
Query Num 1




100%|████████████████████████████████████████████████████| 1/1 [00:00<00:00, 200.89it/s][A[A


Elapsed Time: 0.0s, Elapsed Time per query: 7.9ms
Retrieved top 1000 for 32_5
Computing distances and coverage!
Finished for Rb hat!
Query Num 1




  0%|                                                             | 0/1 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████| 1/1 [01:10<00:00, 70.40s/it][A[A


Elapsed Time: 70.4s, Elapsed Time per query: 70406.6ms
Finished retrieving in the big index!
finished with qid:  32_5
Processing qid: 32_6
Query Num 1




100%|████████████████████████████████████████████████████| 1/1 [00:00<00:00, 136.05it/s][A[A


Elapsed Time: 0.0s, Elapsed Time per query: 12.0ms
Retrieved top 1000 for 32_6
Computing distances and coverage!
Finished for Rb hat!
Query Num 1




  0%|                                                             | 0/1 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████| 1/1 [01:09<00:00, 69.04s/it][A[A


Elapsed Time: 69.0s, Elapsed Time per query: 69040.7ms
Finished retrieving in the big index!
finished with qid:  32_6
Processing qid: 32_7
Query Num 1




100%|████████████████████████████████████████████████████| 1/1 [00:00<00:00, 314.58it/s][A[A


Elapsed Time: 0.0s, Elapsed Time per query: 6.3ms
Retrieved top 1000 for 32_7
Computing distances and coverage!
Finished for Rb hat!
Query Num 1




  0%|                                                             | 0/1 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████| 1/1 [01:09<00:00, 69.35s/it][A[A


Elapsed Time: 69.4s, Elapsed Time per query: 69351.8ms
Finished retrieving in the big index!
finished with qid:  32_7
Processing qid: 32_8
Query Num 1




100%|████████████████████████████████████████████████████| 1/1 [00:00<00:00, 266.15it/s][A[A


Elapsed Time: 0.0s, Elapsed Time per query: 6.9ms
Retrieved top 1000 for 32_8
Computing distances and coverage!
Finished for Rb hat!
Query Num 1




  0%|                                                             | 0/1 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████| 1/1 [01:09<00:00, 69.22s/it][A[A


Elapsed Time: 69.2s, Elapsed Time per query: 69226.2ms
Finished retrieving in the big index!
finished with qid:  32_8
Processing qid: 32_9
Query Num 1




100%|████████████████████████████████████████████████████| 1/1 [00:00<00:00, 126.40it/s][A[A


Elapsed Time: 0.0s, Elapsed Time per query: 11.3ms
Retrieved top 1000 for 32_9
Computing distances and coverage!
Finished for Rb hat!
Query Num 1




  0%|                                                             | 0/1 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████| 1/1 [01:09<00:00, 69.56s/it][A[A


Elapsed Time: 69.6s, Elapsed Time per query: 69569.2ms
Finished retrieving in the big index!
finished with qid:  32_9
Processing qid: 32_10
Query Num 1




100%|████████████████████████████████████████████████████| 1/1 [00:00<00:00, 245.04it/s][A[A


Elapsed Time: 0.0s, Elapsed Time per query: 8.0ms
Retrieved top 1000 for 32_10
Computing distances and coverage!
Finished for Rb hat!
Query Num 1




  0%|                                                             | 0/1 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████| 1/1 [01:09<00:00, 69.07s/it][A[A


Elapsed Time: 69.1s, Elapsed Time per query: 69075.0ms
Finished retrieving in the big index!
finished with qid:  32_10
Processing qid: 32_11
Query Num 1




100%|████████████████████████████████████████████████████| 1/1 [00:00<00:00, 130.85it/s][A[A


Elapsed Time: 0.0s, Elapsed Time per query: 10.8ms
Retrieved top 1000 for 32_11
Computing distances and coverage!
Finished for Rb hat!
Query Num 1




  0%|                                                             | 0/1 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████| 1/1 [01:10<00:00, 70.55s/it][A[A


Elapsed Time: 70.5s, Elapsed Time per query: 70549.4ms
Finished retrieving in the big index!
finished with qid:  32_11
Starting conv:  33
Query Num 1




  0%|                                                             | 0/1 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████| 1/1 [01:09<00:00, 69.34s/it][A[A


Elapsed Time: 69.3s, Elapsed Time per query: 69345.2ms
embedding shape: (2000, 768)
(2000,) int64
Create cache for:  33
Processing qid: 33_2
Query Num 1




100%|████████████████████████████████████████████████████| 1/1 [00:00<00:00, 111.41it/s][A[A


Elapsed Time: 0.0s, Elapsed Time per query: 13.4ms
Retrieved top 1000 for 33_2
Computing distances and coverage!
Finished for Rb hat!
Query Num 1




  0%|                                                             | 0/1 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████| 1/1 [01:08<00:00, 68.62s/it][A[A


Elapsed Time: 68.6s, Elapsed Time per query: 68626.4ms
Finished retrieving in the big index!
finished with qid:  33_2
Processing qid: 33_3
Query Num 1




100%|████████████████████████████████████████████████████| 1/1 [00:00<00:00, 158.53it/s][A[A


Elapsed Time: 0.0s, Elapsed Time per query: 11.0ms
Retrieved top 1000 for 33_3
Computing distances and coverage!
Finished for Rb hat!
Query Num 1




  0%|                                                             | 0/1 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████| 1/1 [01:08<00:00, 68.56s/it][A[A


Elapsed Time: 68.6s, Elapsed Time per query: 68563.8ms
Finished retrieving in the big index!
finished with qid:  33_3
Processing qid: 33_4
Query Num 1




100%|████████████████████████████████████████████████████| 1/1 [00:00<00:00, 152.99it/s][A[A


Elapsed Time: 0.0s, Elapsed Time per query: 11.0ms
Retrieved top 1000 for 33_4
Computing distances and coverage!
Finished for Rb hat!
Query Num 1




  0%|                                                             | 0/1 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████| 1/1 [01:10<00:00, 70.27s/it][A[A


Elapsed Time: 70.3s, Elapsed Time per query: 70275.6ms
Finished retrieving in the big index!
finished with qid:  33_4
Processing qid: 33_5
Query Num 1




100%|█████████████████████████████████████████████████████| 1/1 [00:00<00:00, 84.78it/s][A[A


Elapsed Time: 0.0s, Elapsed Time per query: 14.7ms
Retrieved top 1000 for 33_5
Computing distances and coverage!
Finished for Rb hat!
Query Num 1




  0%|                                                             | 0/1 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████| 1/1 [01:11<00:00, 71.25s/it][A[A


Elapsed Time: 71.2s, Elapsed Time per query: 71248.5ms
Finished retrieving in the big index!
finished with qid:  33_5
Processing qid: 33_6
Query Num 1




100%|████████████████████████████████████████████████████| 1/1 [00:00<00:00, 173.90it/s][A[A


Elapsed Time: 0.0s, Elapsed Time per query: 11.9ms
Retrieved top 1000 for 33_6
Computing distances and coverage!
Finished for Rb hat!
Query Num 1




  0%|                                                             | 0/1 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████| 1/1 [01:08<00:00, 68.69s/it][A[A


Elapsed Time: 68.7s, Elapsed Time per query: 68717.3ms
Finished retrieving in the big index!
finished with qid:  33_6
Processing qid: 33_7
Query Num 1




100%|████████████████████████████████████████████████████| 1/1 [00:00<00:00, 235.01it/s][A[A


Elapsed Time: 0.0s, Elapsed Time per query: 7.4ms
Retrieved top 1000 for 33_7
Computing distances and coverage!
Finished for Rb hat!
Query Num 1




  0%|                                                             | 0/1 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████| 1/1 [01:07<00:00, 67.06s/it][A[A


Elapsed Time: 67.1s, Elapsed Time per query: 67063.1ms
Finished retrieving in the big index!
finished with qid:  33_7
Processing qid: 33_8
Query Num 1




100%|████████████████████████████████████████████████████| 1/1 [00:00<00:00, 115.05it/s][A[A


Elapsed Time: 0.0s, Elapsed Time per query: 11.9ms
Retrieved top 1000 for 33_8
Computing distances and coverage!
Finished for Rb hat!
Query Num 1




  0%|                                                             | 0/1 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████| 1/1 [01:09<00:00, 69.99s/it][A[A


Elapsed Time: 70.0s, Elapsed Time per query: 69994.0ms
Finished retrieving in the big index!
finished with qid:  33_8
Processing qid: 33_9
Query Num 1




100%|████████████████████████████████████████████████████| 1/1 [00:00<00:00, 155.34it/s][A[A


Elapsed Time: 0.0s, Elapsed Time per query: 10.9ms
Retrieved top 1000 for 33_9
Computing distances and coverage!
Finished for Rb hat!
Query Num 1




  0%|                                                             | 0/1 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████| 1/1 [01:09<00:00, 69.42s/it][A[A


Elapsed Time: 69.4s, Elapsed Time per query: 69425.2ms
Finished retrieving in the big index!
finished with qid:  33_9
Processing qid: 33_10
Query Num 1




100%|████████████████████████████████████████████████████| 1/1 [00:00<00:00, 114.07it/s][A[A


Elapsed Time: 0.0s, Elapsed Time per query: 13.3ms
Retrieved top 1000 for 33_10
Computing distances and coverage!
Finished for Rb hat!
Query Num 1




  0%|                                                             | 0/1 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████| 1/1 [01:09<00:00, 69.46s/it][A[A


Elapsed Time: 69.5s, Elapsed Time per query: 69466.3ms
Finished retrieving in the big index!
finished with qid:  33_10
Starting conv:  34
Query Num 1




  0%|                                                             | 0/1 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████| 1/1 [01:08<00:00, 68.67s/it][A[A


Elapsed Time: 68.7s, Elapsed Time per query: 68671.4ms
embedding shape: (2000, 768)
(2000,) int64
Create cache for:  34
Processing qid: 34_2
Query Num 1




100%|████████████████████████████████████████████████████| 1/1 [00:00<00:00, 418.13it/s][A[A


Elapsed Time: 0.0s, Elapsed Time per query: 5.2ms
Retrieved top 1000 for 34_2
Computing distances and coverage!
Finished for Rb hat!
Query Num 1




  0%|                                                             | 0/1 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████| 1/1 [01:09<00:00, 69.49s/it][A[A


Elapsed Time: 69.5s, Elapsed Time per query: 69495.2ms
Finished retrieving in the big index!
finished with qid:  34_2
Processing qid: 34_3
Query Num 1




100%|████████████████████████████████████████████████████| 1/1 [00:00<00:00, 112.54it/s][A[A


Elapsed Time: 0.0s, Elapsed Time per query: 14.4ms
Retrieved top 1000 for 34_3
Computing distances and coverage!
Finished for Rb hat!
Query Num 1




  0%|                                                             | 0/1 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████| 1/1 [01:08<00:00, 68.98s/it][A[A


Elapsed Time: 69.0s, Elapsed Time per query: 68983.8ms
Finished retrieving in the big index!
finished with qid:  34_3
Processing qid: 34_4
Query Num 1




100%|████████████████████████████████████████████████████| 1/1 [00:00<00:00, 208.33it/s][A[A


Elapsed Time: 0.0s, Elapsed Time per query: 7.5ms
Retrieved top 1000 for 34_4
Computing distances and coverage!
Finished for Rb hat!
Query Num 1




  0%|                                                             | 0/1 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████| 1/1 [01:09<00:00, 69.30s/it][A[A


Elapsed Time: 69.3s, Elapsed Time per query: 69303.0ms
Finished retrieving in the big index!
finished with qid:  34_4
Processing qid: 34_5
Query Num 1




100%|████████████████████████████████████████████████████| 1/1 [00:00<00:00, 101.07it/s][A[A


Elapsed Time: 0.0s, Elapsed Time per query: 12.8ms
Retrieved top 1000 for 34_5
Computing distances and coverage!
Finished for Rb hat!
Query Num 1




  0%|                                                             | 0/1 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████| 1/1 [01:08<00:00, 68.98s/it][A[A


Elapsed Time: 69.0s, Elapsed Time per query: 68982.7ms
Finished retrieving in the big index!
finished with qid:  34_5
Processing qid: 34_6
Query Num 1




100%|████████████████████████████████████████████████████| 1/1 [00:00<00:00, 204.17it/s][A[A


Elapsed Time: 0.0s, Elapsed Time per query: 9.9ms
Retrieved top 1000 for 34_6
Computing distances and coverage!
Finished for Rb hat!
Query Num 1




  0%|                                                             | 0/1 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████| 1/1 [01:09<00:00, 69.17s/it][A[A


Elapsed Time: 69.2s, Elapsed Time per query: 69173.5ms
Finished retrieving in the big index!
finished with qid:  34_6
Processing qid: 34_7
Query Num 1




100%|████████████████████████████████████████████████████| 1/1 [00:00<00:00, 119.22it/s][A[A


Elapsed Time: 0.0s, Elapsed Time per query: 13.0ms
Retrieved top 1000 for 34_7
Computing distances and coverage!
Finished for Rb hat!
Query Num 1




  0%|                                                             | 0/1 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████| 1/1 [01:08<00:00, 68.81s/it][A[A


Elapsed Time: 68.8s, Elapsed Time per query: 68814.1ms
Finished retrieving in the big index!
finished with qid:  34_7
Processing qid: 34_8
Query Num 1




100%|████████████████████████████████████████████████████| 1/1 [00:00<00:00, 163.59it/s][A[A


Elapsed Time: 0.0s, Elapsed Time per query: 10.3ms
Retrieved top 1000 for 34_8
Computing distances and coverage!
Finished for Rb hat!
Query Num 1




  0%|                                                             | 0/1 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████| 1/1 [01:09<00:00, 69.50s/it][A[A


Elapsed Time: 69.5s, Elapsed Time per query: 69502.9ms
Finished retrieving in the big index!
finished with qid:  34_8
Processing qid: 34_9
Query Num 1




100%|████████████████████████████████████████████████████| 1/1 [00:00<00:00, 117.74it/s][A[A


Elapsed Time: 0.0s, Elapsed Time per query: 12.1ms
Retrieved top 1000 for 34_9
Computing distances and coverage!
Finished for Rb hat!
Query Num 1




  0%|                                                             | 0/1 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████| 1/1 [01:09<00:00, 69.78s/it][A[A


Elapsed Time: 69.8s, Elapsed Time per query: 69780.6ms
Finished retrieving in the big index!
finished with qid:  34_9
Starting conv:  37
Query Num 1




  0%|                                                             | 0/1 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████| 1/1 [01:09<00:00, 69.84s/it][A[A


Elapsed Time: 69.8s, Elapsed Time per query: 69843.6ms
embedding shape: (2000, 768)
(2000,) int64
Create cache for:  37
Processing qid: 37_2
Query Num 1




100%|████████████████████████████████████████████████████| 1/1 [00:00<00:00, 108.69it/s][A[A


Elapsed Time: 0.0s, Elapsed Time per query: 15.2ms
Retrieved top 1000 for 37_2
Computing distances and coverage!
Finished for Rb hat!
Query Num 1




  0%|                                                             | 0/1 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████| 1/1 [01:09<00:00, 69.42s/it][A[A


Elapsed Time: 69.4s, Elapsed Time per query: 69418.3ms
Finished retrieving in the big index!
finished with qid:  37_2
Processing qid: 37_3
Query Num 1




100%|████████████████████████████████████████████████████| 1/1 [00:00<00:00, 234.44it/s][A[A


Elapsed Time: 0.0s, Elapsed Time per query: 7.0ms
Retrieved top 1000 for 37_3
Computing distances and coverage!
Finished for Rb hat!
Query Num 1




  0%|                                                             | 0/1 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████| 1/1 [01:08<00:00, 68.48s/it][A[A


Elapsed Time: 68.5s, Elapsed Time per query: 68484.3ms
Finished retrieving in the big index!
finished with qid:  37_3
Processing qid: 37_4
Query Num 1




100%|████████████████████████████████████████████████████| 1/1 [00:00<00:00, 114.26it/s][A[A


Elapsed Time: 0.0s, Elapsed Time per query: 13.2ms
Retrieved top 1000 for 37_4
Computing distances and coverage!
Finished for Rb hat!
Query Num 1




  0%|                                                             | 0/1 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████| 1/1 [01:11<00:00, 71.85s/it][A[A


Elapsed Time: 71.9s, Elapsed Time per query: 71857.1ms
Finished retrieving in the big index!
finished with qid:  37_4
Processing qid: 37_5
Query Num 1




100%|████████████████████████████████████████████████████| 1/1 [00:00<00:00, 115.42it/s][A[A


Elapsed Time: 0.0s, Elapsed Time per query: 13.9ms
Retrieved top 1000 for 37_5
Computing distances and coverage!
Finished for Rb hat!
Query Num 1




  0%|                                                             | 0/1 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████| 1/1 [01:09<00:00, 69.07s/it][A[A


Elapsed Time: 69.1s, Elapsed Time per query: 69074.5ms
Finished retrieving in the big index!
finished with qid:  37_5
Processing qid: 37_6
Query Num 1




100%|████████████████████████████████████████████████████| 1/1 [00:00<00:00, 105.51it/s][A[A


Elapsed Time: 0.0s, Elapsed Time per query: 14.6ms
Retrieved top 1000 for 37_6
Computing distances and coverage!
Finished for Rb hat!
Query Num 1




  0%|                                                             | 0/1 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████| 1/1 [01:09<00:00, 69.38s/it][A[A


Elapsed Time: 69.4s, Elapsed Time per query: 69387.6ms
Finished retrieving in the big index!
finished with qid:  37_6
Processing qid: 37_7
Query Num 1




100%|████████████████████████████████████████████████████| 1/1 [00:00<00:00, 116.15it/s][A[A


Elapsed Time: 0.0s, Elapsed Time per query: 11.8ms
Retrieved top 1000 for 37_7
Computing distances and coverage!
Finished for Rb hat!
Query Num 1




  0%|                                                             | 0/1 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████| 1/1 [01:09<00:00, 69.95s/it][A[A


Elapsed Time: 70.0s, Elapsed Time per query: 69950.1ms
Finished retrieving in the big index!
finished with qid:  37_7
Processing qid: 37_8
Query Num 1




100%|████████████████████████████████████████████████████| 1/1 [00:00<00:00, 112.17it/s][A[A


Elapsed Time: 0.0s, Elapsed Time per query: 11.9ms
Retrieved top 1000 for 37_8
Computing distances and coverage!
Finished for Rb hat!
Query Num 1




  0%|                                                             | 0/1 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████| 1/1 [01:08<00:00, 68.68s/it][A[A


Elapsed Time: 68.7s, Elapsed Time per query: 68685.9ms
Finished retrieving in the big index!
finished with qid:  37_8
Processing qid: 37_9
Query Num 1




100%|████████████████████████████████████████████████████| 1/1 [00:00<00:00, 106.99it/s][A[A


Elapsed Time: 0.0s, Elapsed Time per query: 14.6ms
Retrieved top 1000 for 37_9
Computing distances and coverage!
Finished for Rb hat!
Query Num 1




  0%|                                                             | 0/1 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████| 1/1 [01:08<00:00, 68.71s/it][A[A


Elapsed Time: 68.7s, Elapsed Time per query: 68712.1ms
Finished retrieving in the big index!
finished with qid:  37_9
Processing qid: 37_10
Query Num 1




100%|████████████████████████████████████████████████████| 1/1 [00:00<00:00, 265.65it/s][A[A


Elapsed Time: 0.0s, Elapsed Time per query: 7.5ms
Retrieved top 1000 for 37_10
Computing distances and coverage!
Finished for Rb hat!
Query Num 1




  0%|                                                             | 0/1 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████| 1/1 [01:08<00:00, 68.73s/it][A[A


Elapsed Time: 68.7s, Elapsed Time per query: 68735.4ms
Finished retrieving in the big index!
finished with qid:  37_10
Processing qid: 37_11
Query Num 1




100%|████████████████████████████████████████████████████| 1/1 [00:00<00:00, 241.19it/s][A[A


Elapsed Time: 0.0s, Elapsed Time per query: 6.6ms
Retrieved top 1000 for 37_11
Computing distances and coverage!
Finished for Rb hat!
Query Num 1




  0%|                                                             | 0/1 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████| 1/1 [01:07<00:00, 67.37s/it][A[A


Elapsed Time: 67.4s, Elapsed Time per query: 67367.4ms
Finished retrieving in the big index!
finished with qid:  37_11
Processing qid: 37_12
Query Num 1




100%|█████████████████████████████████████████████████████| 1/1 [00:00<00:00, 92.56it/s][A[A


Elapsed Time: 0.0s, Elapsed Time per query: 15.1ms
Retrieved top 1000 for 37_12
Computing distances and coverage!
Finished for Rb hat!
Query Num 1




  0%|                                                             | 0/1 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████| 1/1 [01:04<00:00, 64.65s/it][A[A


Elapsed Time: 64.6s, Elapsed Time per query: 64649.6ms
Finished retrieving in the big index!
finished with qid:  37_12
Starting conv:  40
Query Num 1




  0%|                                                             | 0/1 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████| 1/1 [01:03<00:00, 63.35s/it][A[A


Elapsed Time: 63.4s, Elapsed Time per query: 63356.0ms
embedding shape: (2000, 768)
(2000,) int64
Create cache for:  40
Processing qid: 40_2
Query Num 1




100%|████████████████████████████████████████████████████| 1/1 [00:00<00:00, 375.80it/s][A[A


Elapsed Time: 0.0s, Elapsed Time per query: 7.3ms
Retrieved top 1000 for 40_2
Computing distances and coverage!
Finished for Rb hat!
Query Num 1




  0%|                                                             | 0/1 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████| 1/1 [01:03<00:00, 63.39s/it][A[A


Elapsed Time: 63.4s, Elapsed Time per query: 63393.4ms
Finished retrieving in the big index!
finished with qid:  40_2
Processing qid: 40_3
Query Num 1




100%|████████████████████████████████████████████████████| 1/1 [00:00<00:00, 321.45it/s][A[A


Elapsed Time: 0.0s, Elapsed Time per query: 5.9ms
Retrieved top 1000 for 40_3
Computing distances and coverage!
Finished for Rb hat!
Query Num 1




  0%|                                                             | 0/1 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████| 1/1 [01:03<00:00, 63.26s/it][A[A


Elapsed Time: 63.3s, Elapsed Time per query: 63264.4ms
Finished retrieving in the big index!
finished with qid:  40_3
Processing qid: 40_4
Query Num 1




100%|████████████████████████████████████████████████████| 1/1 [00:00<00:00, 366.54it/s][A[A


Elapsed Time: 0.0s, Elapsed Time per query: 5.4ms
Retrieved top 1000 for 40_4
Computing distances and coverage!
Finished for Rb hat!
Query Num 1




  0%|                                                             | 0/1 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████| 1/1 [01:03<00:00, 63.26s/it][A[A


Elapsed Time: 63.3s, Elapsed Time per query: 63260.8ms
Finished retrieving in the big index!
finished with qid:  40_4
Processing qid: 40_5
Query Num 1




100%|████████████████████████████████████████████████████| 1/1 [00:00<00:00, 330.16it/s][A[A


Elapsed Time: 0.0s, Elapsed Time per query: 6.0ms
Retrieved top 1000 for 40_5
Computing distances and coverage!
Finished for Rb hat!
Query Num 1




  0%|                                                             | 0/1 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████| 1/1 [01:03<00:00, 63.37s/it][A[A


Elapsed Time: 63.4s, Elapsed Time per query: 63374.8ms
Finished retrieving in the big index!
finished with qid:  40_5
Processing qid: 40_6
Query Num 1




100%|████████████████████████████████████████████████████| 1/1 [00:00<00:00, 312.10it/s][A[A


Elapsed Time: 0.0s, Elapsed Time per query: 5.9ms
Retrieved top 1000 for 40_6
Computing distances and coverage!
Finished for Rb hat!
Query Num 1




  0%|                                                             | 0/1 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████| 1/1 [01:03<00:00, 63.45s/it][A[A


Elapsed Time: 63.5s, Elapsed Time per query: 63450.5ms
Finished retrieving in the big index!
finished with qid:  40_6
Processing qid: 40_7
Query Num 1




100%|████████████████████████████████████████████████████| 1/1 [00:00<00:00, 310.57it/s][A[A


Elapsed Time: 0.0s, Elapsed Time per query: 5.6ms
Retrieved top 1000 for 40_7
Computing distances and coverage!
Finished for Rb hat!
Query Num 1




  0%|                                                             | 0/1 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████| 1/1 [01:04<00:00, 64.08s/it][A[A


Elapsed Time: 64.1s, Elapsed Time per query: 64085.8ms
Finished retrieving in the big index!
finished with qid:  40_7
Processing qid: 40_8
Query Num 1




100%|████████████████████████████████████████████████████| 1/1 [00:00<00:00, 314.25it/s][A[A


Elapsed Time: 0.0s, Elapsed Time per query: 5.7ms
Retrieved top 1000 for 40_8
Computing distances and coverage!
Finished for Rb hat!
Query Num 1




  0%|                                                             | 0/1 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████| 1/1 [01:03<00:00, 63.39s/it][A[A


Elapsed Time: 63.4s, Elapsed Time per query: 63395.1ms
Finished retrieving in the big index!
finished with qid:  40_8
Processing qid: 40_9
Query Num 1




100%|████████████████████████████████████████████████████| 1/1 [00:00<00:00, 327.25it/s][A[A


Elapsed Time: 0.0s, Elapsed Time per query: 5.6ms
Retrieved top 1000 for 40_9
Computing distances and coverage!
Finished for Rb hat!
Query Num 1




  0%|                                                             | 0/1 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████| 1/1 [01:03<00:00, 63.39s/it][A[A


Elapsed Time: 63.4s, Elapsed Time per query: 63390.8ms
Finished retrieving in the big index!
finished with qid:  40_9
Processing qid: 40_10
Query Num 1




100%|████████████████████████████████████████████████████| 1/1 [00:00<00:00, 325.34it/s][A[A


Elapsed Time: 0.0s, Elapsed Time per query: 5.7ms
Retrieved top 1000 for 40_10
Computing distances and coverage!
Finished for Rb hat!
Query Num 1




  0%|                                                             | 0/1 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████| 1/1 [01:03<00:00, 63.41s/it][A[A


Elapsed Time: 63.4s, Elapsed Time per query: 63414.6ms
Finished retrieving in the big index!
finished with qid:  40_10
Starting conv:  49
Query Num 1




  0%|                                                             | 0/1 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████| 1/1 [01:03<00:00, 63.46s/it][A[A


Elapsed Time: 63.5s, Elapsed Time per query: 63457.9ms
embedding shape: (2000, 768)
(2000,) int64
Create cache for:  49
Processing qid: 49_2
Query Num 1




100%|████████████████████████████████████████████████████| 1/1 [00:00<00:00, 332.88it/s][A[A


Elapsed Time: 0.0s, Elapsed Time per query: 6.8ms
Retrieved top 1000 for 49_2
Computing distances and coverage!
Finished for Rb hat!
Query Num 1




  0%|                                                             | 0/1 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████| 1/1 [01:03<00:00, 63.36s/it][A[A


Elapsed Time: 63.4s, Elapsed Time per query: 63362.1ms
Finished retrieving in the big index!
finished with qid:  49_2
Processing qid: 49_3
Query Num 1




100%|████████████████████████████████████████████████████| 1/1 [00:00<00:00, 242.25it/s][A[A


Elapsed Time: 0.0s, Elapsed Time per query: 7.0ms
Retrieved top 1000 for 49_3
Computing distances and coverage!
Finished for Rb hat!
Query Num 1




  0%|                                                             | 0/1 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████| 1/1 [01:03<00:00, 63.22s/it][A[A


Elapsed Time: 63.2s, Elapsed Time per query: 63218.2ms
Finished retrieving in the big index!
finished with qid:  49_3
Processing qid: 49_4
Query Num 1




100%|████████████████████████████████████████████████████| 1/1 [00:00<00:00, 155.14it/s][A[A


Elapsed Time: 0.0s, Elapsed Time per query: 9.6ms
Retrieved top 1000 for 49_4
Computing distances and coverage!
Finished for Rb hat!
Query Num 1




  0%|                                                             | 0/1 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████| 1/1 [01:03<00:00, 63.29s/it][A[A


Elapsed Time: 63.3s, Elapsed Time per query: 63295.8ms
Finished retrieving in the big index!
finished with qid:  49_4
Processing qid: 49_5
Query Num 1




100%|████████████████████████████████████████████████████| 1/1 [00:00<00:00, 242.52it/s][A[A


Elapsed Time: 0.0s, Elapsed Time per query: 8.0ms
Retrieved top 1000 for 49_5
Computing distances and coverage!
Finished for Rb hat!
Query Num 1




  0%|                                                             | 0/1 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████| 1/1 [01:03<00:00, 63.27s/it][A[A


Elapsed Time: 63.3s, Elapsed Time per query: 63267.6ms
Finished retrieving in the big index!
finished with qid:  49_5
Processing qid: 49_6
Query Num 1




100%|████████████████████████████████████████████████████| 1/1 [00:00<00:00, 159.01it/s][A[A


Elapsed Time: 0.0s, Elapsed Time per query: 10.2ms
Retrieved top 1000 for 49_6
Computing distances and coverage!
Finished for Rb hat!
Query Num 1




  0%|                                                             | 0/1 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████| 1/1 [01:03<00:00, 63.24s/it][A[A


Elapsed Time: 63.2s, Elapsed Time per query: 63242.4ms
Finished retrieving in the big index!
finished with qid:  49_6
Processing qid: 49_7
Query Num 1




100%|████████████████████████████████████████████████████| 1/1 [00:00<00:00, 232.02it/s][A[A


Elapsed Time: 0.0s, Elapsed Time per query: 7.6ms
Retrieved top 1000 for 49_7
Computing distances and coverage!
Finished for Rb hat!
Query Num 1




  0%|                                                             | 0/1 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████| 1/1 [01:03<00:00, 63.40s/it][A[A


Elapsed Time: 63.4s, Elapsed Time per query: 63406.4ms
Finished retrieving in the big index!
finished with qid:  49_7
Processing qid: 49_8
Query Num 1




100%|████████████████████████████████████████████████████| 1/1 [00:00<00:00, 229.86it/s][A[A


Elapsed Time: 0.0s, Elapsed Time per query: 8.4ms
Retrieved top 1000 for 49_8
Computing distances and coverage!
Finished for Rb hat!
Query Num 1




  0%|                                                             | 0/1 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████| 1/1 [01:03<00:00, 63.41s/it][A[A


Elapsed Time: 63.4s, Elapsed Time per query: 63410.5ms
Finished retrieving in the big index!
finished with qid:  49_8
Processing qid: 49_9
Query Num 1




100%|████████████████████████████████████████████████████| 1/1 [00:00<00:00, 241.82it/s][A[A


Elapsed Time: 0.0s, Elapsed Time per query: 7.6ms
Retrieved top 1000 for 49_9
Computing distances and coverage!
Finished for Rb hat!
Query Num 1




  0%|                                                             | 0/1 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████| 1/1 [01:03<00:00, 63.42s/it][A[A


Elapsed Time: 63.4s, Elapsed Time per query: 63418.0ms
Finished retrieving in the big index!
finished with qid:  49_9
Processing qid: 49_10
Query Num 1




100%|████████████████████████████████████████████████████| 1/1 [00:00<00:00, 239.92it/s][A[A


Elapsed Time: 0.0s, Elapsed Time per query: 6.9ms
Retrieved top 1000 for 49_10
Computing distances and coverage!
Finished for Rb hat!
Query Num 1




  0%|                                                             | 0/1 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████| 1/1 [01:03<00:00, 63.41s/it][A[A


Elapsed Time: 63.4s, Elapsed Time per query: 63415.6ms
Finished retrieving in the big index!
finished with qid:  49_10
Starting conv:  50
Query Num 1




  0%|                                                             | 0/1 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████| 1/1 [01:03<00:00, 63.39s/it][A[A


Elapsed Time: 63.4s, Elapsed Time per query: 63394.5ms
embedding shape: (2000, 768)
(2000,) int64
Create cache for:  50
Processing qid: 50_2
Query Num 1




100%|████████████████████████████████████████████████████| 1/1 [00:00<00:00, 368.08it/s][A[A


Elapsed Time: 0.0s, Elapsed Time per query: 6.4ms
Retrieved top 1000 for 50_2
Computing distances and coverage!
Finished for Rb hat!
Query Num 1




  0%|                                                             | 0/1 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████| 1/1 [01:03<00:00, 63.37s/it][A[A


Elapsed Time: 63.4s, Elapsed Time per query: 63375.5ms
Finished retrieving in the big index!
finished with qid:  50_2
Processing qid: 50_3
Query Num 1




100%|████████████████████████████████████████████████████| 1/1 [00:00<00:00, 326.96it/s][A[A


Elapsed Time: 0.0s, Elapsed Time per query: 6.9ms
Retrieved top 1000 for 50_3
Computing distances and coverage!
Finished for Rb hat!
Query Num 1




  0%|                                                             | 0/1 [00:00<?, ?it/s][A[A

In [27]:
len(results_list)

194000

In [40]:
# convert ids to original
with open("/data3/muntean/conversational-cache/data/star-ranking/CAST-manual-queries-star-L2-ranking-top1000-cache-top"+str(topk)+"-first-utt_v2.tsv", 'w') as outputfile:
    for (qid, pid, idx) in results_list:
        
        new_qid = offset2qid[qid]
        orig_qid = newqid2qid_dict[new_qid]
        
        new_pid = offset2pid[pid]
        orig_pid = newpid2pid_dict[new_pid]
        
        outputfile.write(f"{orig_qid}\t{orig_pid}\t{idx}\n")

# Eval results

In [None]:
# import pyterrier as pt
# pt.init()

In [None]:
qrel_path = "../data/CAST_qrels/qrels-docs.2019.txt"
qrels_df = pd.read_csv(qrel_path, delimiter=" ", header=None)
qrels_df[[3]] = qrels_df[[3]].astype(int)
qrels_df = qrels_df.drop([1], axis=1)
qrels_df.columns=["qid", "docno", "label"]
qrels = qrels_df

In [None]:
topics_path='../data/CAST-2019/test_manual_utterance.tsv' #manual

topics_df = pd.read_csv(topics_path, delimiter="\t", header=None)
topics_df.columns=["qid", "query"]
topics = topics_df
topics.head()

In [None]:
results_path = "../data/star-ranking/CAST-manual-queries-star-L2-ranking-top1000-cache-top"+str(topk)+"-first-utt_new.tsv"
results_df = pd.read_csv(results_path, delimiter="\t", header=None)
results_df[3] = 1000-results_df[2]
results_df.columns=["qid", "docno", "rank", "score"]
results_df.head()
# Results produced by the transformers must have “qid”, “docno”, “score”, “rank” columns.

In [None]:
%%time
pt.Experiment([results_df], topics, qrels, names=["STAR"], 
              eval_metrics=["map", "recip_rank", "recall_200", "P_3", "P_1", "ndcg_cut_3"])

In [None]:
%%time
res_per_query = pt.Experiment([results_df], topics, qrels, names=["STAR"], 
              eval_metrics=["map", "recip_rank", "recall_200", "P_3", "P_1", "ndcg_cut_3"], perquery=True)

In [None]:
res_per_query

# Explore the dictionaries

In [None]:
# distance dicts

# cache_radius_dict = dict() # between first utterance (qi) and last retrieved doc from the big index
# query_distance_dict = dict() # distance between the first (qi) and the rest of utterances of the conversation (q)
# query_radius_dict = dict() # between current utterance (q) and last retrieved doc from the small index
# diff_distance_dict = dict() # rbcapuccio = rqi - d(q, qi)

## save into .tsv files

In [63]:
cache_radius_dict

{'31_1': 28431738.661924537,
 '32_1': 355076152.0915213,
 '33_1': 106153431.02278665,
 '34_1': 906646872.6375507,
 '37_1': 592793363.4919354,
 '40_1': 337580499.0137381,
 '49_1': 76002583.38057733,
 '50_1': 125072275.5781277,
 '54_1': 669592163.721115,
 '56_1': 797924352.9748623,
 '58_1': 13380189.454374745,
 '59_1': 43408989.84819126,
 '61_1': 444688996.72242874,
 '67_1': 197938599.9763936,
 '68_1': 316337297.70754796,
 '69_1': 178151706.9753045,
 '75_1': 851853126.6474372,
 '77_1': 12856472.70520966,
 '78_1': 152264309.32073274,
 '79_1': 367685953.07002944}

In [64]:
with open('../data/star-ranking/cache-radius-star-L2-ranking-top1000-cache-top'+str(topk)+'_v2.tsv', 'w+') as fout:
    for i in cache_radius_dict:
        fout.write((i)+"\t"+str(cache_radius_dict[i])+"\n")

In [65]:
query_distance_dict

{'31_2': 3.9169831,
 '31_3': 5.3893275,
 '31_4': 5.084863,
 '31_5': 4.4554224,
 '31_6': 3.0022993,
 '31_7': 4.6444077,
 '31_8': 4.5658855,
 '31_9': 4.1239033,
 '32_2': 3.905776,
 '32_3': 5.3604336,
 '32_4': 4.893136,
 '32_5': 5.2374506,
 '32_6': 7.8547215,
 '32_7': 5.997548,
 '32_8': 5.9000263,
 '32_9': 6.0646443,
 '32_10': 6.0722146,
 '32_11': 5.3437514,
 '33_2': 2.2506719,
 '33_3': 4.3431473,
 '33_4': 3.7971294,
 '33_5': 3.5857072,
 '33_6': 3.6771781,
 '33_7': 3.4688494,
 '33_8': 3.3191404,
 '33_9': 4.0358944,
 '33_10': 8.226567,
 '34_2': 3.502506,
 '34_3': 3.4360359,
 '34_4': 4.074217,
 '34_5': 3.2885115,
 '34_6': 4.2839885,
 '34_7': 4.124792,
 '34_8': 4.4366627,
 '34_9': 3.6143475,
 '37_2': 2.6533976,
 '37_3': 3.9765513,
 '37_4': 3.9772527,
 '37_5': 3.5880055,
 '37_6': 7.1824594,
 '37_7': 7.6885986,
 '37_8': 5.2015877,
 '37_9': 7.2291493,
 '37_10': 7.1421967,
 '37_11': 7.0716915,
 '37_12': 7.046846,
 '40_2': 4.4159546,
 '40_3': 4.7052565,
 '40_4': 5.166576,
 '40_5': 5.545054,
 '40_

In [82]:
with open('../data/star-ranking/query-dist-star-L2-ranking-top1000-cache-top'+str(topk)+'_v2.tsv', 'w+') as fout:
    for i in query_distance_dict:
        fout.write((i)+"\t"+str(query_distance_dict[i])+"\n")

In [67]:
query_radius_dict

{'31_2': [99251942.92940265, 220947467.7251526, 212635840.87385574],
 '31_3': [124612187.46673912, 191627755.07755393, 121833534.56639196],
 '31_4': [1030599384.3308121, 93353991.23604874, 53113961.50053301],
 '31_5': [198164265.38188753, 777482.9, 101270710.48925853],
 '31_6': [102699527.70718996, 81083450.49029242, 178204139.5976222],
 '31_7': [166013882.86962366, 191627533.35039073, 194480290.31248894],
 '31_8': [182236215.32494026, 242726717.99504328, 3881733.6608218425],
 '31_9': [536862782.46281636, 244642560.1759066, 17845748.9718848],
 '32_2': [144977557.72888035, 943260071.1273872, 480213164.8269171],
 '32_3': [122397656.60753389, 364256714.1879441, 18660588.840928745],
 '32_4': [160500141.6396697, 160500197.06529555, 367388.72],
 '32_5': [121811253.46949138, 199441022.4187786, 97163948.77528352],
 '32_6': [88274121.80366217, 17066132.141343877, 87818661.72330387],
 '32_7': [185757543.91827187, 127331839.80048174, 229824519.12850353],
 '32_8': [472759554.38127977, 119167090.85

In [68]:
with open('../data/star-ranking/query-radius-star-L2-ranking-top1000-cache-top'+str(topk)+'_v2.tsv', 'w+') as fout:
    for i in query_radius_dict:
        fout.write(str(i)+"\t"+str(query_radius_dict[i])+"\n")

In [69]:
diff_distance_dict

{'31_2': 28431734.74494141,
 '31_3': 28431733.27259701,
 '31_4': 28431733.57706135,
 '31_5': 28431734.206502136,
 '31_6': 28431735.65962523,
 '31_7': 28431734.017516788,
 '31_8': 28431734.096038993,
 '31_9': 28431734.538021263,
 '32_2': 355076148.1857453,
 '32_3': 355076146.73108774,
 '32_4': 355076147.1983853,
 '32_5': 355076146.8540707,
 '32_6': 355076144.2367998,
 '32_7': 355076146.0939732,
 '32_8': 355076146.191495,
 '32_9': 355076146.026877,
 '32_10': 355076146.0193067,
 '32_11': 355076146.7477699,
 '33_2': 106153428.77211478,
 '33_3': 106153426.67963937,
 '33_4': 106153427.22565725,
 '33_5': 106153427.43707946,
 '33_6': 106153427.3456085,
 '33_7': 106153427.55393723,
 '33_8': 106153427.70364621,
 '33_9': 106153426.98689225,
 '33_10': 106153422.79621938,
 '34_2': 906646869.1350447,
 '34_3': 906646869.2015148,
 '34_4': 906646868.5633339,
 '34_5': 906646869.3490392,
 '34_6': 906646868.3535622,
 '34_7': 906646868.5127586,
 '34_8': 906646868.200888,
 '34_9': 906646869.0232033,
 '37_2'

In [70]:
with open('../data/star-ranking/rbhat-star-L2-ranking-top1000-cache-top'+str(topk)+'_v2.tsv', 'w+') as fout:
    for i in diff_distance_dict:
        fout.write(str(i)+"\t"+str(diff_distance_dict[i])+"\n")

In [71]:
coverage1

{'31_2': 1000,
 '31_3': 1000,
 '31_4': 1000,
 '31_5': 1000,
 '31_6': 1000,
 '31_7': 1000,
 '31_8': 1000,
 '31_9': 1000,
 '32_2': 1000,
 '32_3': 1000,
 '32_4': 1000,
 '32_5': 1000,
 '32_6': 1000,
 '32_7': 1000,
 '32_8': 1000,
 '32_9': 1000,
 '32_10': 1000,
 '32_11': 1000,
 '33_2': 1000,
 '33_3': 1000,
 '33_4': 1000,
 '33_5': 1000,
 '33_6': 1000,
 '33_7': 1000,
 '33_8': 1000,
 '33_9': 1000,
 '33_10': 1000,
 '34_2': 1000,
 '34_3': 1000,
 '34_4': 1000,
 '34_5': 1000,
 '34_6': 1000,
 '34_7': 1000,
 '34_8': 1000,
 '34_9': 1000,
 '37_2': 1000,
 '37_3': 1000,
 '37_4': 1000,
 '37_5': 1000,
 '37_6': 1000,
 '37_7': 1000,
 '37_8': 1000,
 '37_9': 1000,
 '37_10': 1000,
 '37_11': 1000,
 '37_12': 1000,
 '40_2': 1000,
 '40_3': 1000,
 '40_4': 1000,
 '40_5': 1000,
 '40_6': 1000,
 '40_7': 1000,
 '40_8': 1000,
 '40_9': 1000,
 '40_10': 1000,
 '49_2': 1000,
 '49_3': 1000,
 '49_4': 1000,
 '49_5': 1000,
 '49_6': 1000,
 '49_7': 1000,
 '49_8': 1000,
 '49_9': 1000,
 '49_10': 1000,
 '50_2': 1000,
 '50_3': 1000,
 '

In [72]:
with open('../data/star-ranking/garanteed-coverage-star-L2-ranking-top1000-cache-top'+str(topk)+'_v2.tsv', 'w+') as fout:
    for i in coverage1:
        fout.write(str(i)+"\t"+str(coverage1[i])+"\n")

In [73]:
coverage2

{'31_2': [3, 5, 10],
 '31_3': [0, 1, 4],
 '31_4': [0, 1, 3],
 '31_5': [0, 0, 3],
 '31_6': [3, 5, 10],
 '31_7': [3, 5, 10],
 '31_8': [3, 5, 10],
 '31_9': [2, 3, 7],
 '32_2': [3, 5, 10],
 '32_3': [3, 4, 7],
 '32_4': [3, 3, 8],
 '32_5': [0, 0, 1],
 '32_6': [0, 0, 0],
 '32_7': [1, 3, 6],
 '32_8': [1, 2, 4],
 '32_9': [0, 2, 3],
 '32_10': [1, 1, 2],
 '32_11': [3, 4, 7],
 '33_2': [3, 5, 10],
 '33_3': [3, 5, 10],
 '33_4': [3, 5, 10],
 '33_5': [3, 5, 10],
 '33_6': [3, 5, 10],
 '33_7': [3, 5, 10],
 '33_8': [3, 5, 10],
 '33_9': [2, 4, 9],
 '33_10': [0, 0, 0],
 '34_2': [3, 5, 10],
 '34_3': [3, 5, 10],
 '34_4': [3, 4, 9],
 '34_5': [3, 5, 10],
 '34_6': [3, 5, 9],
 '34_7': [3, 5, 10],
 '34_8': [3, 5, 9],
 '34_9': [3, 5, 10],
 '37_2': [3, 5, 10],
 '37_3': [3, 5, 10],
 '37_4': [3, 5, 10],
 '37_5': [3, 5, 10],
 '37_6': [0, 0, 1],
 '37_7': [0, 0, 1],
 '37_8': [2, 3, 4],
 '37_9': [0, 0, 0],
 '37_10': [0, 0, 0],
 '37_11': [1, 1, 1],
 '37_12': [0, 0, 0],
 '40_2': [3, 5, 10],
 '40_3': [2, 2, 5],
 '40_4': [2,

In [74]:
with open('../data/star-ranking/approximated-coverage-star-L2-ranking-top1000-cache-top'+str(topk)+'_v2.tsv', 'w+') as fout:
    for i in coverage2:
        fout.write(str(i)+"\t"+str(coverage2[i])+"\n")

## save into .pickle files

In [75]:
with open('../data/star-ranking/cache-radius-star-L2-ranking-top1000-cache-top'+str(topk)+'_v2.pickle', 'wb') as handle:
    pickle.dump(cache_radius_dict, handle)

In [76]:
with open('../data/star-ranking/query-dist-star-L2-ranking-top1000-cache-top'+str(topk)+'_v2.pickle', 'wb') as handle:
    pickle.dump(query_distance_dict, handle)

In [77]:
with open('../data/star-ranking/query-radius-star-L2-ranking-top1000-cache-top'+str(topk)+'_v2.pickle', 'wb') as handle:
    pickle.dump(query_radius_dict, handle)

In [78]:
with open('../data/star-ranking/rbcapuccio-star-L2-ranking-top1000-cache-top'+str(topk)+'_v2.pickle', 'wb') as handle:
    pickle.dump(diff_distance_dict, handle)

In [79]:
with open('../data/star-ranking/garanteed-coverage-star-L2-ranking-top1000-cache-top'+str(topk)+'_v2.pickle', 'wb') as handle:
    pickle.dump(coverage1, handle)

In [81]:
with open('../data/star-ranking/approximated-coverage-star-L2-ranking-top1000-cache-top'+str(topk)+'_v2.pickle', 'wb') as handle:
    pickle.dump(coverage2, handle)