In [7]:
import numpy as np
import time
from annoy import AnnoyIndex
import faiss
import hnswlib

# ===============================
# 1. Buat dataset 1 juta data 5D
# ===============================
n_data = 1_000_000   # bisa coba 100_000 dulu jika RAM terbatas
dim = 5
X = np.random.random((n_data, dim)).astype(np.float32)

# Query point
query = np.random.random((1, dim)).astype(np.float32)
k = 10

# ===============================
# 2. Annoy
# ===============================
print("=== Annoy ===")
ann_index = AnnoyIndex(dim, 'euclidean')
# 'euclidean' jarak Euclidean (L2).
# 'angular' cosine distance (jarak sudut).
# 'manhattan' L1 distance.
# 'hamming' data biner (bit vectors).
# 'dot' berdasarkan dot product.

start = time.time()
for i in range(n_data):
    ann_index.add_item(i, X[i])
ann_index.build(10)  # 10 trees
build_time = time.time() - start

start = time.time()
neighbors = ann_index.get_nns_by_vector(query[0], k, include_distances=True)
query_time = time.time() - start

print("Build time:", build_time, "detik")
print("Query time:", query_time, "detik")
print("Neighbors:", neighbors[0][:5], "...")

# ===============================
# 3. FAISS (Flat Index)
# ===============================
print("\n=== FAISS ===")
# khusus angular/cosine, pakai inner product trus normalisasi
# X_FI_ANGULAR = X.copy()
# query_FI_ANGULAR = query.copy()
# faiss.normalize_L2(X_FI_ANGULAR)
# faiss.normalize_L2(query_FI_ANGULAR)
# ==============================

faiss_index = faiss.IndexFlatL2(dim)
# IndexFlatL2 euclidean (L2)
# IndexFlatIP Inner product (pengganti angular/cosin)
# IndexFlatL1 mahattanan (L1)

start = time.time()
# faiss_index.add(X_FI_ANGULAR) #khusus angular/cosine
faiss_index.add(X)
build_time = time.time() - start

start = time.time()
# distances, indices = faiss_index.search(query_FI_ANGULAR, k) #khusus angular/cosine
distances, indices = faiss_index.search(query, k)
query_time = time.time() - start

print("Build time:", build_time, "detik")
print("Query time:", query_time, "detik")
print("Neighbors:", indices[0][:5], "...")

# ===============================
# 4. HNSW (hnswlib)
# ===============================
print("\n=== HNSW (hnswlib) ===")
hnsw_index = hnswlib.Index(space='l2', dim=dim)
# 'l2'  Euclidean distance
# 'ip'  Inner Product
# 'cosine'  Cosine/Angular distance

start = time.time()
hnsw_index.init_index(max_elements=n_data, ef_construction=200, M=16)
hnsw_index.add_items(X)
build_time = time.time() - start

hnsw_index.set_ef(50)

start = time.time()
labels, distances = hnsw_index.knn_query(query, k=k)
query_time = time.time() - start

print("Build time:", build_time, "detik")
print("Query time:", query_time, "detik")
print("Neighbors:", labels[0][:5], "...")


=== Annoy ===
Build time: 3.0713858604431152 detik
Query time: 0.0 detik
Neighbors: [1813, 97559, 722750, 603975, 753116] ...

=== FAISS ===
Build time: 0.00599980354309082 detik
Query time: 0.0030002593994140625 detik
Neighbors: [  1813  97559 722750 603975 606003] ...

=== HNSW (hnswlib) ===
Build time: 15.468973398208618 detik
Query time: 0.0010046958923339844 detik
Neighbors: [  1813  97559 722750 603975 606003] ...


Lakukan percobaan pada metric distance yang berbeda. catat hasilnya pada tabel yang anda buat sendiri seperti pada praktikum 1.

| Distance Metrics | Annoy | Faiss | HNSW | Annoy (Build/Query) | Faiss (Build/Query) | HNSW (Build/Query) |
|------------------|------------------------------------------------|------------------------------------------------|------------------------------------------------|---------------------|---------------------|--------------------|
| Euclidean (L2)       | [144948, 167411, 266196, 428562, 862362]        | [144948, 167411, 266196, 428562, 862362]        | [144948, 167411, 266196, 428562, 862362]        | 3.0713858604431152 s / 0.0s        | 0.00599980354309082 s / 0.0030002593994140625 s       | 15.468973398208618 s / 0.0010046958923339844 s      |
| Angular/Cosine   | [156706, 800294, 239183, 142512, 45361]        | [156706 800294 239183 142512  45361]        | [156706 800294 239183 142512  45361]       | 3.378959894180298 s / 0.0s        | 0.007000446319580078 s / 0.004080772399902344 s       | 13.148305654525757 s / 0.0s       |
