In [3]:
import numpy as np
import sys
sys.path.append('../')
from quantized_fast_forward.fast_forward.index import FaissPQIndex, Mode

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
def bvecs_read(fname):
    a = np.fromfile(fname, dtype=np.int32, count=1)
    b = np.fromfile(fname, dtype=np.uint8)
    d = a[0]
    return b.reshape(-1, d + 4)[:, 4:].copy()


def ivecs_read(fname):
    a = np.fromfile(fname, dtype='int32')
    d = a[0]
    return a.reshape(-1, d + 1)[:, 1:].copy()


def fvecs_read(fname):
    return ivecs_read(fname).view('float32')

In [5]:
def recall_at_r(I, gt, r):
    """
    Compute Recall@r over the all queries.

    Args:
        I (np.ndarray): Retrieval result, with shape(#queries, ANY), integer.
                        The index of the database item
        gt (np.ndarray): Groundtruth. np.array with shape(#queries, ANY). Integer.
                         Only gt[:, 0] is used
        r (int): Top-r

    Returns:
        The average recall@r over all queries
    """
    assert r <= I.shape[1]
    assert len(I) == len(gt)
    n_ok = (I[:, :r] == gt[:, :1]).sum()
    return n_ok / float(I.shape[0])

def search_query(query: np.ndarray, k: int, vectors:np.ndarray):
    distances = np.sum((vectors - query) ** 2, axis=1)
    sorted_indices = np.argsort(distances)

    top_k_indices = sorted_indices[:k]
    top_k_distances = distances[top_k_indices]

    return top_k_distances, top_k_indices

def search(queries: np.ndarray, k: int, vectors:np.ndarray):
    distance_results = []
    index_results = []

    for i in range(queries.shape[0]):
        query = queries[i]
        d, i = search_query(query, k, vectors)
        distance_results.append(d)
        index_results.append(i)

    return np.vstack(distance_results), np.vstack(index_results)
    

In [7]:
base = fvecs_read("/home/catalinlup/MyWorkspace/MasterThesis/ThesisCodebase/big_data/siftsmall/siftsmall_base.fvecs")
training_data = fvecs_read("/home/catalinlup/MyWorkspace/MasterThesis/ThesisCodebase/big_data/siftsmall/siftsmall_learn.fvecs")
queries = fvecs_read("/home/catalinlup/MyWorkspace/MasterThesis/ThesisCodebase/big_data/siftsmall/siftsmall_query.fvecs")
ground_truth = ivecs_read("/home/catalinlup/MyWorkspace/MasterThesis/ThesisCodebase/big_data/siftsmall/siftsmall_groundtruth.ivecs")

In [15]:
INDEX_PATH = '/home/catalinlup/MyWorkspace/MasterThesis/datasets/quantized_indices/siftsmall2.pickle'

In [16]:
index = FaissPQIndex.from_disk(INDEX_PATH)

In [17]:
IDS = [str(i) for i in range(base.shape[0])]
vectors, local_ids = index._get_vectors(IDS, Mode.PASSAGE)

In [18]:
distances, I = search(queries, 10, vectors)

In [19]:
recall_at_r(I, ground_truth, 10)

0.76