# **Initialization:**

In [None]:
import numpy as np
import sklearn
import sklearn.preprocessing

!pip install faiss-cpu
import faiss

# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Load training set embedding
# Change the file location accordingly
embeddings  = np.load('/content/drive/MyDrive/CS260/final_project/Dataset/2014_train_embeddings.npy')
# print(embeddings.shape)

# Loading Embedding
# Normalized training set embedding
normEmbeddings = sklearn.preprocessing.normalize(np.load('/content/drive/MyDrive/CS260/final_project/Dataset/2014_train_embeddings.npy'))


# Load validation set embedding
valEmbeddings  = np.load('/content/drive/MyDrive/CS260/final_project/Dataset/2014_val_embeddings.npy')
# print(valEmbeddings.shape)

normValEmbeddings = sklearn.preprocessing.normalize(np.load('/content/drive/MyDrive/CS260/final_project/Dataset/2014_val_embeddings.npy'))

# Load benchmark in cosine similarity.
cosine_index  = np.load('/content/drive/MyDrive/CS260/final_project/Dataset/cosine_idx.npy')
print(cosine_index.shape)

# # Load benchmark in euclidean distance(L2)
euclid_index = np.load('/content/drive/MyDrive/CS260/final_project/Dataset/euclidean_idx.npy')
print(euclid_index.shape)


# evaluation function
def evaluation(search_index, benchmark_index, topk):
  """
    Calucate all the evaluation metrics: precision@k, recall@k, f1@k and ndcg@k
    Args:
      - search_index: the search result from knn algorithm.
      - benchmark_index: The benchmark you want to compare with.
      - topk: Top K that you are interested in.

    Output:
      - precisionAtK: Precision@k calculation result.
      - recallAtK: Recall@k calculation result.
      - f1AtK: F1@k calculation result.
      - ndcgAtK: NDCG@k calculation result.
  """
  def dcg_at_k(r):
    return r[0] + np.sum(r[1:] / np.log2(np.arange(2, r.size + 1)))

  def ndcg_at_k(r, relevant):
    ideal =np.array(list(range(relevant, 0, -1))) / relevant
    dcg_ideal = dcg_at_k(ideal)
    dcg = dcg_at_k(np.array(r))
    return dcg / dcg_ideal

  precisionSum = 0
  recallSum = 0
  f1Sum = 0
  ndcgSum = 0
  for i in range(len(benchmark_index)):
    hit = 0
    r = []
    relevant = np.count_nonzero(benchmark_index[i] != -1)
    for j in range(topk):
      pos = np.where(benchmark_index[i, :topk] == search_index[i,j])
      if pos[0].size > 0:
        hit += 1
        r.append((relevant - pos[0][0]) / relevant)
      else:
        r.append(0)
    # TP/(TP + FP)
    precision = hit / topk
    # TP/(TP + FN)
    recall = hit / relevant

    f1Sum += 2 * precision * recall / (precision + recall) if precision * recall != 0 else 0
    recallSum += recall
    precisionSum += precision
    ndcgSum += ndcg_at_k(r, relevant)


  precisionAtK = precisionSum / len(benchmark_index)
  recallAtK = recallSum / len(benchmark_index)
  f1AtK = f1Sum / len(benchmark_index)
  ndcgAtK = ndcgSum / len(benchmark_index)
  return precisionAtK, recallAtK, f1AtK, ndcgAtK

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
(40504, 1000)
(40504, 1000)


In [None]:
!pip install hnswlib

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


## **Timer**

In [None]:
import time
def timer(start,end, text):
  hours, rem = divmod(end-start, 3600)
  minutes, seconds = divmod(rem, 60)
  print(text , "{:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds))
  return end - start

# Brute Force method for calculating ground truth labels

# **You do not need to run the following 2 code blocks any more unless you want to use some new distance metrics or create benchmark for other embedding**.

In [None]:
from sklearn.metrics.pairwise import cosine_similarity, euclidean_distances

# Cosine Similarity
threshold = 0.75
similar_indices = None
count_sum = 0

for i in range(0, len(normValEmbeddings), 1000):
  print(i)
  similarities = cosine_similarity(normValEmbeddings[i:i+1000], normEmbeddings)
  index = similarities.argsort()[:, :-1001:-1]
  index[:, 1:] = np.where(np.take_along_axis(similarities, index[:,1:], axis=1) > threshold, index[:,1:], -1)
  count_sum += np.count_nonzero(index != -1)
  if similar_indices is None:
    similar_indices = index
  else:
    similar_indices = np.append(similar_indices, index, axis=0)
  np.save('/content/drive/MyDrive/CS260/Dataset/cosine_idx.npy', similar_indices)

print(count_sum / normValEmbeddings.shape[0])

In [None]:
# Euclidean distance
threshold = 16
similar_indices = None
count_sum = 0

for i in range(0, len(valEmbeddings), 1000):
  print(i)
  similarities = euclidean_distances(valEmbeddings[i:i+1000], embeddings)
  index = similarities.argsort()[:, :1000]
  index[:, 1:] = np.where(np.take_along_axis(similarities, index[:,1:], axis=1) < threshold, index[:,1:], -1)
  count_sum += np.count_nonzero(index != -1)
  if similar_indices is None:
    similar_indices = index
  else:
    similar_indices = np.append(similar_indices, index, axis=0)
  np.save('/content/drive/MyDrive/CS260/Dataset/euclidean_idx.npy', similar_indices)

print(count_sum / normValEmbeddings.shape[0])

In [None]:
# # Load benchmark in cosine similarity.
# cosine_index  = np.load('/content/drive/MyDrive/CS260/final_project/Dataset/cosine.npy')
# print(cosine_index.shape)

# # Load benchmark in euclidean distance(L2)
# euclid_index = np.load('/content/drive/MyDrive/CS260/final_project/Dataset/euclidean.npy')
# print(euclid_index.shape)

# **Cosine Distance**

In [None]:
from os import kill
import faiss
import random



# Most basic searching method in Inner Product/Cosine distance
class FaissFlatIPRecommender:
    def __init__(self, embedding):
        # The index build by Faiss
        self.index = faiss.IndexFlatIP(embedding.shape[1]) # IndexFlatL2
        self.index.add(embedding.astype('float32'))

# IVF Flat with cosine metric
class FaissIVFFlatIPRecommender:
    def __init__(self, embedding):
        dim = embedding.shape[1]
        nlist = 20 # hyperparam
        quantizer = faiss.IndexFlatIP(embedding.shape[1])
        self.index = faiss.IndexIVFFlat(quantizer, dim, nlist, faiss.METRIC_INNER_PRODUCT)
         # hyperparameter: if nprobe=nlist, search exacly
        self.index.nprobe = 3
        self.index.train(embedding.astype('float32'))
        self.index.add(embedding.astype('float32'))

# HNSW lightweight cosine
import hnswlib
class HNSWRecommender:
    
    MODEL_NAME = 'HNSW'


    def __init__(self, embedding, similarity='cosine'):
        self.embedding = embedding
        self.similarity = similarity
        self.hnsw = hnswlib.Index(space = similarity, dim = self.embedding.shape[1])
        self.hnsw.init_index(max_elements = self.embedding.shape[0], ef_construction = 200, M = 200)
        self.hnsw.set_ef(2000)
        ids = np.arange(self.embedding.shape[0])
        # Element insertion (can be called several times):
        self.hnsw.add_items(embedding, ids)
  
       
    def get_similar_items_topn(self, embedding, topn=1000):
        # use cos similarity
        #Computes the cosine similarity between the user profile and all item profiles
        # Query dataset, k - number of closest elements (returns 2 numpy arrays)
        labels, distances = self.hnsw.knn_query(embedding, k=topn)
        #Sort the similar items by similarity
        # print(labels.shape)
        return labels

import random
class FaissHNSWRecommender:
    def __init__(self, embedding, similarity='cosine'):
        self.similarity = similarity
        if self.similarity == "cosine":
          metric_constant = faiss.METRIC_INNER_PRODUCT
        elif self.similarity == "l2":
          metric_constant = faiss.METRIC_L2
        # The index build by Faiss
        self.index = faiss.IndexHNSWFlat(embedding.shape[1], 200, metric_constant)
        # explicit IndexHNSW(int d = 0, int M = 32, MetricType metric = METRIC_L2)
        self.index.hnsw.efConstruction = 200
        self.index.hnsw.efSearch = 2000
        self.index.add(embedding.astype('float32'))





In [None]:
import pandas as pd

#create DataFrame
df = pd.DataFrame(columns=['distance', 'method', 'k', 'precision', 'recall', 'NDCG', 'F1', 'Time'])

#view DataFrame


In [None]:
########################
# cosine four methods
########################



################ FLAT ################
# Build index
start_time = time.time()
faiss_recommender = FaissFlatIPRecommender(normEmbeddings)
faiss_recommender.init_time = timer(start_time, time.time(), 'Faiss FLAT IP Init: ')
print('Evaluating Faiss Flat model...')
# Get search result in validation set.

x, y = normValEmbeddings.shape
random.seed(260)
sample_5000 = random.sample(range(x), 5000) # sample 5000 embeddings
search_time = []
ks = [1, 10, 100, 1000]
for k in ks:
  start_time = time.time()
  distance, index = faiss_recommender.index.search(normValEmbeddings[sample_5000].astype("float32"), k)
  search_time_i = timer(start_time, time.time(), f'Faiss FLAT IP Search time topk={k}: ')

  precisionAtK, recallAtK, f1AtK, ndcgAtK = evaluation(index, cosine_index[sample_5000], k)
  # print('precisionAtK:',precisionAtK)
  print(f'Precision@{k}: ', precisionAtK)
  print(f'ndcg@{k}: ',ndcgAtK)
  print(f'recallAt{k}:', recallAtK)
  print(f'f1At{k}:',f1AtK)
  df.loc[len(df)] = ['cosine', 'Flat',k ,precisionAtK,recallAtK,ndcgAtK,f1AtK, search_time_i]
print('#'*10)
############### IVF #####################

start_time = time.time()
faiss_recommender = FaissIVFFlatIPRecommender(normEmbeddings)
faiss_recommender.init_time = timer(start_time, time.time(), 'Faiss IVFFLAT IP Init: ')
print('Evaluating Faiss IVFFlat model...')

x, y = normValEmbeddings.shape
random.seed(260)
sample_5000 = random.sample(range(x), 5000) # sample 5000 embeddings
search_time = []
ks = [1, 10, 100, 1000]
for k in ks:
  start_time = time.time()
  distance, index = faiss_recommender.index.search(normValEmbeddings[sample_5000].astype("float32"), k)
  search_time_i = timer(start_time, time.time(), f'Faiss FLAT IP Search time topk={k}: ')

  precisionAtK, recallAtK, f1AtK, ndcgAtK = evaluation(index, cosine_index[sample_5000], k)
  # print('precisionAtK:',precisionAtK)
  print(f'Precision@{k}: ', precisionAtK)
  print(f'ndcg@{k}: ',ndcgAtK)
  print(f'recallAt{k}:', recallAtK)
  print(f'f1At{k}:',f1AtK)
  df.loc[len(df)] = ['cosine', 'IVFFlat',k ,precisionAtK,recallAtK,ndcgAtK,f1AtK, search_time_i]
print('#'*10)
########### HNSW lightweight ###########
start_time = time.time()
hnsw_recommender = HNSWRecommender(normEmbeddings)
hnsw_recommender.init_time = timer(start_time, time.time(), 'HNSW IP Init: ')
print('Evaluating HNSW model...')

x, y = normValEmbeddings.shape
random.seed(260)
sample_5000 = random.sample(range(x), 5000) # sample 5000 embeddings
search_time = []
ks = [1, 10, 100, 1000]
for k in ks:
  start_time = time.time()
  similar_item_index_top10 = hnsw_recommender.get_similar_items_topn(normValEmbeddings[sample_5000], topn=k)
  search_time_i = timer(start_time, time.time(), f'Faiss FLAT IP Search time topk={k}: ')
  precisionAtK, recallAtK, f1AtK, ndcgAtK = evaluation(similar_item_index_top10, cosine_index[sample_5000], k)
  # print('precisionAtK:',precisionAtK)
  print(f'Precision@{k}: ', precisionAtK)
  print(f'ndcg@{k}: ',ndcgAtK)
  print(f'recallAt{k}:', recallAtK)
  print(f'f1At{k}:',f1AtK)
  df.loc[len(df)] = ['cosine', 'HNSW(lightweight)',k ,precisionAtK,recallAtK,ndcgAtK,f1AtK, search_time_i]

print('#'*10)
########## Faiss HNSW ################
start_time = time.time()
hnsw_recommender = FaissHNSWRecommender(embeddings, 'cosine')
hnsw_recommender.init_time = timer(start_time, time.time(), 'HNSW Faiss L2 Init: ')
print('Evaluating HNSW model...')

x, y = normValEmbeddings.shape
random.seed(260)
sample_5000 = random.sample(range(x), 5000) # sample 5000 embeddings
search_time = []
ks = [1, 10, 100, 1000]
for k in ks:
  start_time = time.time()
  distance, index = hnsw_recommender.index.search(valEmbeddings[sample_5000].astype("float32"), k)
  search_time_i = timer(start_time, time.time(), f'Faiss FLAT IP Search time topk={k}: ')



  precisionAtK, recallAtK, f1AtK, ndcgAtK = evaluation(index, cosine_index[sample_5000], k)
  # print('precisionAtK:',precisionAtK)
  print(f'Precision@{k}: ', precisionAtK)
  print(f'ndcg@{k}: ',ndcgAtK)
  print(f'recallAt{k}:', recallAtK)
  print(f'f1At{k}:',f1AtK)
  df.loc[len(df)] = ['cosine', 'HNSW',k ,precisionAtK,recallAtK,ndcgAtK,f1AtK, search_time_i]


# df.to_csv('result_method.csv')
# print('End!')



  

Faiss FLAT IP Init:  00:00:03.36
Evaluating Faiss Flat model...
Faiss FLAT IP Search time topk=1:  00:00:57.30
Precision@1:  0.9876
ndcg@1:  0.2670990836599389
recallAt1: 0.19943612635754898
f1At1: 0.2258954613697324
Faiss FLAT IP Search time topk=10:  00:00:41.69
Precision@10:  0.7913399999999943
ndcg@10:  0.5411604121205668
recallAt10: 0.39123183770686965
f1At10: 0.2665246571566844
Faiss FLAT IP Search time topk=100:  00:00:43.01
Precision@100:  0.5636279999999988
ndcg@100:  0.805752296436554
recallAt100: 0.6966748047803151
f1At100: 0.3561471261605895
Faiss FLAT IP Search time topk=1000:  00:00:43.03
Precision@1000:  0.2591959999999986
ndcg@1000:  0.999879199009122
recallAt1000: 0.9998165999999951
f1At1000: 0.3113273279420775
##########
Faiss IVFFLAT IP Init:  00:00:05.09
Evaluating Faiss IVFFlat model...
Faiss FLAT IP Search time topk=1:  00:01:10.99
Precision@1:  0.9786
ndcg@1:  0.25952313865889887
recallAt1: 0.1922451011068691
f1At1: 0.21844467974086387
Faiss FLAT IP Search time t

# **L2 distance:**

In [None]:
class FaissIVFFlatL2Recommender:
    def __init__(self, embedding):
        dim = embedding.shape[1]
        nlist = 20 # hyperparam
        quantizer = faiss.IndexFlatL2(embedding.shape[1])
        self.index = faiss.IndexIVFFlat(quantizer, dim, nlist, faiss.METRIC_L2)
         
        self.index.nprobe = 3
        self.index.train(embedding.astype('float32'))
        self.index.add(embedding.astype('float32'))

class FaissFlatL2Recommender:
    def __init__(self, embedding):
        # The index build by Faiss
        self.index = faiss.IndexFlatL2(embedding.shape[1]) # IndexFlatL2
        self.index.add(embedding.astype('float32'))



In [None]:
####################
# L2 Distance
####################
from os import kill
import faiss
import random
import time


# Build index
start_time = time.time()
faiss_recommender = FaissFlatIPRecommender(embeddings)
faiss_recommender.init_time = timer(start_time, time.time(), 'Faiss FLAT L2 Init: ')
print('Evaluating Faiss Flat model...')
# Get search result in validation set.

x, y = normValEmbeddings.shape
random.seed(260)
sample_5000 = random.sample(range(x), 5000) # sample 5000 embeddings
search_time = []
ks = [1, 10, 100, 1000]
for k in ks:
  start_time = time.time()
  distance, index = faiss_recommender.index.search(valEmbeddings[sample_5000].astype("float32"), k)
  

  search_time_i = timer(start_time, time.time(), f'Faiss FLAT L2 Search time topk={k}: ')

  precisionAtK, recallAtK, f1AtK, ndcgAtK = evaluation(index, euclid_index[sample_5000], k)
  # print('precisionAtK:',precisionAtK)
  print(f'Precision@{k}: ', precisionAtK)
  print(f'ndcg@{k}: ',ndcgAtK)
  print(f'recallAt{k}:', recallAtK)
  print(f'f1At{k}:',f1AtK)
  df.loc[len(df)] = ['L2', 'Flat',k ,precisionAtK,recallAtK,ndcgAtK,f1AtK, search_time_i]
print('#'*10)
  ############# IVF ############

start_time = time.time()
faiss_recommender = FaissIVFFlatIPRecommender(embeddings)
faiss_recommender.init_time = timer(start_time, time.time(), 'Faiss IVFFLAT L2 Init: ')
print('Evaluating Faiss IVFFlat model...')

x, y = normValEmbeddings.shape
random.seed(260)
sample_5000 = random.sample(range(x), 5000) # sample 5000 embeddings
search_time = []
ks = [1, 10, 100, 1000]
for k in ks:
  start_time = time.time()
  distance, index = faiss_recommender.index.search(valEmbeddings[sample_5000].astype("float32"), k)
  search_time_i = timer(start_time, time.time(), f'Faiss FLAT L2 Search time topk={k}: ')

  precisionAtK, recallAtK, f1AtK, ndcgAtK = evaluation(index, euclid_index[sample_5000], k)
  # print('precisionAtK:',precisionAtK)
  print(f'Precision@{k}: ', precisionAtK)
  print(f'ndcg@{k}: ',ndcgAtK)
  print(f'recallAt{k}:', recallAtK)
  print(f'f1At{k}:',f1AtK)
  df.loc[len(df)] = ['L2', 'IVFFlat',k ,precisionAtK,recallAtK,ndcgAtK,f1AtK, search_time_i]

print('#'*10)
########### HNSW ###############
start_time = time.time()
hnsw_recommender = HNSWRecommender(embeddings, 'l2')
hnsw_recommender.init_time = timer(start_time, time.time(), 'HNSW L2 Init: ')
print('Evaluating HNSW model...')

x, y = normValEmbeddings.shape
random.seed(260)
sample_5000 = random.sample(range(x), 5000) # sample 5000 embeddings
search_time = []
ks = [1, 10, 100, 1000]
for k in ks:
  start_time = time.time()
  similar_item_index_top10 = hnsw_recommender.get_similar_items_topn(valEmbeddings[sample_5000], topn=k)
  search_time_i = timer(start_time, time.time(), f'Faiss FLAT L2 Search time topk={k}: ')

  precisionAtK, recallAtK, f1AtK, ndcgAtK = evaluation(similar_item_index_top10, euclid_index[sample_5000], k)
  # print('precisionAtK:',precisionAtK)
  print(f'Precision@{k}: ', precisionAtK)
  print(f'ndcg@{k}: ',ndcgAtK)
  print(f'recallAt{k}:', recallAtK)
  print(f'f1At{k}:',f1AtK)
  df.loc[len(df)] = ['L2', 'HNSW(lightweight)',k ,precisionAtK,recallAtK,ndcgAtK,f1AtK, search_time_i]

print('#'*10)
########### Faiss HNSW ############
start_time = time.time()
hnsw_recommender = FaissHNSWRecommender(embeddings, 'l2')
hnsw_recommender.init_time = timer(start_time, time.time(), 'HNSW Faiss L2 Init: ')
print('Evaluating HNSW model...')

x, y = normValEmbeddings.shape
random.seed(260)
sample_5000 = random.sample(range(x), 5000) # sample 5000 embeddings
search_time = []
ks = [1, 10, 100, 1000]
for k in ks:
  start_time = time.time()
  distance, index = hnsw_recommender.index.search(valEmbeddings[sample_5000].astype("float32"), k)
  search_time_i = timer(start_time, time.time(), f'Faiss FLAT L2 Search time topk={k}: ')

  precisionAtK, recallAtK, f1AtK, ndcgAtK = evaluation(index, euclid_index[sample_5000], k)
  # print('precisionAtK:',precisionAtK)
  print(f'Precision@{k}: ', precisionAtK)
  print(f'ndcg@{k}: ',ndcgAtK)
  print(f'recallAt{k}:', recallAtK)
  print(f'f1At{k}:',f1AtK)
  df.loc[len(df)] = ['L2', 'HNSW',k ,precisionAtK,recallAtK,ndcgAtK,f1AtK, search_time_i]

df.to_csv('result_method.csv')
print('End!')




Faiss FLAT L2 Init:  00:00:02.48
Evaluating Faiss Flat model...
Faiss FLAT L2 Search time topk=1:  00:00:50.52
Precision@1:  0.083
ndcg@1:  0.008156374127005825
recallAt1: 0.0033656836047436956
f1At1: 0.004782567188669662
Faiss FLAT L2 Search time topk=10:  00:00:42.05
Precision@10:  0.19852000000000383
ndcg@10:  0.07356303275667271
recallAt10: 0.0394188592198179
f1At10: 0.03664747523605443
Faiss FLAT L2 Search time topk=100:  00:00:43.32
Precision@100:  0.3329919999999974
ndcg@100:  0.28647656381445574
recallAt100: 0.2410720958353772
f1At100: 0.17411845740821577
Faiss FLAT L2 Search time topk=1000:  00:00:48.74
Precision@1000:  0.34664439999999874
ndcg@1000:  0.713119784513183
recallAt1000: 0.853053009311692
f1At1000: 0.4051613202199642
##########
Faiss IVFFLAT L2 Init:  00:00:05.32
Evaluating Faiss IVFFlat model...
Faiss FLAT L2 Search time topk=1:  00:01:18.69
Precision@1:  0.0832
ndcg@1:  0.008328910135287948
recallAt1: 0.003489422565302947
f1At1: 0.004953024592900651
Faiss FLAT L2

# **Different Inputs:**

In [None]:
pic = range(512)
text = range(512, 1028)
pic_text = range(1028)
pic_text2 = range(3*512)
pic_text3 = range(4*512)
pic_text4 = range(5*512)
text_all = range(512, 512*6)

import pandas as pd

#create DataFrame
df_time = pd.DataFrame(columns=['distance', 'method', 'k', 'data_input','precision', 'recall', 'NDCG', 'F1', 'Time'])
input_names = ['pic', 'text', 'pic_text', 'pic_text2', 'pic_text3', 'pic_text4', 'text_all']
#view DataFrame

indexes = [pic, text, pic_text, pic_text2, pic_text3, pic_text4, text_all]
# indexes = []

import random
import hnswlib
import time
class HNSWRecommender:
    
    MODEL_NAME = 'HNSW'


    def __init__(self, embedding, similarity='cosine'):
        self.embedding = embedding
        self.similarity = similarity
        self.hnsw = hnswlib.Index(space = similarity, dim = self.embedding.shape[1])
        self.hnsw.init_index(max_elements = self.embedding.shape[0], ef_construction = 200, M = 200)
        self.hnsw.set_ef(2000)
        ids = np.arange(self.embedding.shape[0])
        # Element insertion (can be called several times):
        self.hnsw.add_items(embedding, ids)
  
       
    def get_similar_items_topn(self, embedding, topn=1000):
        # use cos similarity by default
        #Computes the cosine similarity between the user profile and all item profiles
        # Query dataset, k - number of closest elements (returns 2 numpy arrays)
        labels, distances = self.hnsw.knn_query(embedding, k=topn)
        #Sort the similar items by similarity
        # print(labels.shape)
        return labels

for m,i in enumerate(indexes):
  print(f'{min(i)}:{max(i)} Trial: ')

  start_time = time.time()
  hnsw_recommender = HNSWRecommender(embeddings[:, i], 'l2')
  hnsw_recommender.init_time = timer(start_time, time.time(), 'HNSW L2 Init: ')
  print('Evaluating HNSW model...')

  x, y = normValEmbeddings.shape
  random.seed(260)
  sample_5000 = random.sample(range(x), 5000) # sample 5000 embeddings
  search_time = []
  k = 100

  start_time = time.time()
  similar_item_index_top10 = hnsw_recommender.get_similar_items_topn(valEmbeddings[sample_5000][:,i], topn=k)
  search_time_i = timer(start_time, time.time(), f'Faiss HNSW L2 Search time topk={k}: ')

  precisionAtK, recallAtK, f1AtK, ndcgAtK = evaluation(similar_item_index_top10, euclid_index[sample_5000], k)
  # print('precisionAtK:',precisionAtK)
  print(f'Precision@{k}: ', precisionAtK)
  print(f'ndcg@{k}: ',ndcgAtK)
  print(f'recallAt{k}:', recallAtK)
  print(f'f1At{k}:',f1AtK)
  df_time.loc[len(df_time)] = ['L2', 'HNSW(lightweight)',k , input_names[m],precisionAtK,recallAtK,ndcgAtK,f1AtK, search_time_i]

  print('#'*20)

df_time.to_csv('result_time.csv')
print('End!')




0:511 Trial: 
HNSW L2 Init:  00:01:01.11
Evaluating HNSW model...
Faiss HNSW L2 Search time topk=100:  00:00:23.33
Precision@100:  0.2846759999999968
ndcg@100:  0.25541912728141725
recallAt100: 0.20585173970487558
f1At100: 0.14885388486903964
####################
512:1027 Trial: 
HNSW L2 Init:  00:01:20.23
Evaluating HNSW model...
Faiss HNSW L2 Search time topk=100:  00:00:28.77
Precision@100:  0.2529039999999992
ndcg@100:  0.22667546930876148
recallAt100: 0.18265516144116103
f1At100: 0.1356911960014816
####################
0:1027 Trial: 
HNSW L2 Init:  00:02:14.30
Evaluating HNSW model...
Faiss HNSW L2 Search time topk=100:  00:00:43.99
Precision@100:  0.4089619999999999
ndcg@100:  0.38143014443062234
recallAt100: 0.3060234299362849
f1At100: 0.21762061155816437
####################
0:1535 Trial: 
HNSW L2 Init:  00:02:23.45
Evaluating HNSW model...
Faiss HNSW L2 Search time topk=100:  00:00:50.35
Precision@100:  0.49982800000000127
ndcg@100:  0.4670412457195233
recallAt100: 0.365530612