In [None]:
import time

def eval_qual(index, param=-1, k=10):
    ideal_score = np.array([(2**3 - 1) if i == 0 else (2**2 - 1) for i in range(k)])
    logs = np.array([np.log2(i+2) for i in range(k)])
    idcg = np.array([0.00000001 if elem ==0 else elem for elem in (ideal_score/logs)])
    idcgsum = np.sum(idcg)
    dcgsum = 0
    query_mat_ = query_mat
    ndcgsum = 0
    time_used = 0

    for i, qvec in enumerate(query_mat_):
        start = time.time()
        docids = index.get_nns_by_vector(vector=qvec, n=k, search_k=param, include_distances=False)
        end = time.time()
        time_used += (end-start)
        cur_scores = np.array([2**marks_dct[(i, docid)]-1 for docid in docids])
        dcg = cur_scores / logs
        dcgsum += np.sum(dcg)
        ndcgsum += np.sum(dcg) / idcgsum

    print("ndcg@{}  : {:.4f}".format(k, ndcgsum / query_mat_.shape[0]))
    print("dcg@{}   : {:.4f}".format(k, dcgsum / query_mat_.shape[0]))
    print("time_used: {:.4f} ms".format(time_used * 1000 / query_mat_.shape[0]))

In [None]:
import time

def eval_qual_faiss(index, k=10, regime="onequery"):
    ideal_score = np.array([(2**3 - 1) if i == 0 else (2**2 - 1) for i in range(k)])
    logs = np.array([np.log2(i+2) for i in range(k)])
    idcg = np.array([0.00000001 if elem ==0 else elem for elem in (ideal_score/logs)])
    idcgsum = np.sum(idcg)
    dcgsum = 0
    ndcgsum = 0
    time_used = 0
    query_mat_ = query_mat.astype(np.float32)

    if regime == "onequery":
        for i, q in enumerate(query_mat_):
            start = time.time()
            D, I = index.search(np.array([q]).astype(np.float32), k)
            end = time.time()
            docids = I[0]
            time_used += (end-start)
            cur_scores = np.array([2**marks_dct[(i, docid)]-1 for docid in docids])
            dcg = cur_scores / logs
            dcgsum += np.sum(dcg)
            ndcgsum += np.sum(dcg) / idcgsum
    elif regime == "fullmat":
        start = time.time()
        D, I = index.search(query_mat_, k)
        end = time.time()
        time_used += (end-start)
        for i, docids in enumerate(I):
            cur_scores = np.array([2**marks_dct[(i, docid)]-1 for docid in docids])
            dcg = cur_scores / logs
            dcgsum += np.sum(dcg)
            ndcgsum += np.sum(dcg) / idcgsum
    else:
        print("such regime doesn't exist")
        return

    print("ndcg@{}  : {:.4f}".format(k, ndcgsum / query_mat_.shape[0]))
    print("dcg@{}   : {:.4f}".format(k, dcgsum / query_mat_.shape[0]))
    print("time_used: {:.4f} ms".format(time_used * 1000 / query_mat_.shape[0]))

In [None]:
class FlatIndex:

    def __init__(self, doc_mat):
        self.doc_mat = doc_mat

    def search(self, query_emb, k):
        dists = np.sum(np.sqrt((self.doc_mat - query_emb)**2), axis=1).flatten()
        min_indices = np.argpartition(dists, k-1)[:k]
        min_values = dists[min_indices]
        sorted_min_indices = min_indices[np.argsort(min_values)]
        return (dists[sorted_min_indices].reshape(1, k), sorted_min_indices.reshape(1, k))

In [None]:
import time

def eval_qual_hnsw(index, k=10):
    ideal_score = np.array([(2**3 - 1) if i == 0 else (2**2 - 1) for i in range(k)])
    logs = np.array([np.log2(i+2) for i in range(k)])
    idcg = np.array([0.00000001 if elem ==0 else elem for elem in (ideal_score/logs)])
    idcgsum = np.sum(idcg)
    dcgsum = 0
    ndcgsum = 0
    time_used = 0
    query_mat_ = query_mat.astype(np.float32)

    for i, q in enumerate(query_mat_):
        start = time.time()
        I, D = index.knn_query(np.array([q]).astype(np.float32), k)
        end = time.time()
        docids = I[0]
        time_used += (end-start)
        cur_scores = np.array([2**marks_dct[(i, docid)]-1 for docid in docids])
        dcg = cur_scores / logs
        dcgsum += np.sum(dcg)
        ndcgsum += np.sum(dcg) / idcgsum

    print("ndcg@{}  : {:.4f}".format(k, ndcgsum / query_mat_.shape[0]))
    print("dcg@{}   : {:.4f}".format(k, dcgsum / query_mat_.shape[0]))
    print("time_used: {:.4f} ms".format(time_used * 1000 / query_mat_.shape[0]))