In [2]:
!pip install annoy faiss-cpu hnswlib

Collecting annoy
  Downloading annoy-1.17.3.tar.gz (647 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/647.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━[0m [32m450.6/647.5 kB[0m [31m13.6 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m647.5/647.5 kB[0m [31m12.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting faiss-cpu
  Downloading faiss_cpu-1.12.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (5.1 kB)
Collecting hnswlib
  Downloading hnswlib-0.8.0.tar.gz (36 kB)
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Downloading faiss_cpu-1.12.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (31.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [3]:
import numpy as np
import time
from annoy import AnnoyIndex
import faiss
import hnswlib

n_data_map={}
dim_map={}
n_data_map['seribu']=1_000
n_data_map['sejuta']=1_000_000
dim_map['2D']=2
dim_map['5D']=5

datasets = {}
datasets[1] = np.random.random((n_data_map['seribu'], dim_map['2D'])).astype(np.float32)
datasets[2] = np.random.random((n_data_map['seribu'], dim_map['5D'])).astype(np.float32)
datasets[3] = np.random.random((n_data_map['sejuta'], dim_map['2D'])).astype(np.float32)
datasets[4] = np.random.random((n_data_map['sejuta'], dim_map['5D'])).astype(np.float32)

k = 10

for i, X in datasets.items():
    n_data, dim = X.shape
    print(f"\n{'='*50}")
    print(f"MENJALANKAN ALGORITMA UNTUK DATASET X[{i}]")
    print(f"Jumlah Data: {n_data}, Dimensi: {dim}")
    print(f"{'='*50}\n")

    query_vector = X[np.random.randint(0, n_data)]

    # ===============================
    # 2. Annoy
    # ===============================
    print("--- Annoy ---")
    ann_index = AnnoyIndex(dim, 'euclidean')

    start = time.time()
    for j in range(n_data):
        ann_index.add_item(j, X[j])
    ann_index.build(10)  # 10 trees
    build_time = time.time() - start

    start = time.time()
    neighbors = ann_index.get_nns_by_vector(query_vector, k, include_distances=True)
    query_time = time.time() - start

    print(f"Build time: {build_time} detik")
    print(f"Query time: {query_time} detik")
    print(f"Neighbors : {neighbors[0][:5]} ...\n")

    # ===============================
    # 3. FAISS (Flat Index)
    # ===============================
    print("--- FAISS (IndexFlatL2) ---")
    faiss_index = faiss.IndexFlatL2(dim)

    start = time.time()
    faiss_index.add(X)
    build_time = time.time() - start

    # FAISS membutuhkan query dalam bentuk 2D array
    query_faiss = np.array([query_vector])

    start = time.time()
    distances, indices = faiss_index.search(query_faiss, k)
    query_time = time.time() - start

    print(f"Build time: {build_time} detik")
    print(f"Query time: {query_time} detik")
    print(f"Neighbors : {indices[0][:5]} ...\n")

    # ===============================
    # 4. HNSW (hnswlib)
    # ===============================
    print("--- HNSW (hnswlib) ---")
    hnsw_index = hnswlib.Index(space='l2', dim=dim)

    start = time.time()
    hnsw_index.init_index(max_elements=n_data, ef_construction=200, M=16)
    hnsw_index.add_items(X)
    build_time = time.time() - start

    hnsw_index.set_ef(50)

    start = time.time()
    labels, distances = hnsw_index.knn_query(query_vector, k=k)
    query_time = time.time() - start

    print(f"Build time: {build_time} detik")
    print(f"Query time: {query_time} detik")
    print(f"Neighbors : {labels[0][:5]} ...\n")



MENJALANKAN ALGORITMA UNTUK DATASET X[1]
Jumlah Data: 1000, Dimensi: 2

--- Annoy ---
Build time: 0.021515607833862305 detik
Query time: 8.869171142578125e-05 detik
Neighbors : [358, 0, 39, 839, 251] ...

--- FAISS (IndexFlatL2) ---
Build time: 5.9604644775390625e-05 detik
Query time: 6.246566772460938e-05 detik
Neighbors : [358   0  39 839 251] ...

--- HNSW (hnswlib) ---
Build time: 0.059081077575683594 detik
Query time: 6.270408630371094e-05 detik
Neighbors : [358   0  39 839 251] ...


MENJALANKAN ALGORITMA UNTUK DATASET X[2]
Jumlah Data: 1000, Dimensi: 5

--- Annoy ---
Build time: 0.02209949493408203 detik
Query time: 6.723403930664062e-05 detik
Neighbors : [930, 503, 406, 485, 230] ...

--- FAISS (IndexFlatL2) ---
Build time: 3.24249267578125e-05 detik
Query time: 3.695487976074219e-05 detik
Neighbors : [930 503 406 485 230] ...

--- HNSW (hnswlib) ---
Build time: 0.055478811264038086 detik
Query time: 6.365776062011719e-05 detik
Neighbors : [930 503 406 485 230] ...


MENJALANK

### Perbandingan Kinerja Algoritma ANN

| Jumlah Data/Dimensi | ANNOY | FAISS | HNSW |
| :--- | :--- | :--- | :--- |
| **1000/2D** | Build: 0.0215 s<br>Query: 8.87e-05 s | Build: 5.96e-05 s<br>Query: 6.25e-05 s | Build: 0.0591 s<br>Query: 6.27e-05 s |
| **1000/5D** | Build: 0.0221 s<br>Query: 6.72e-05 s | Build: 3.24e-05 s<br>Query: 3.70e-05 s | Build: 0.0555 s<br>Query: 6.37e-05 s |
| **1,000,000/2D** | Build: 33.2834 s<br>Query: 9.87e-05 s | Build: 0.0060 s<br>Query: 0.0065 s | Build: 100.8685 s<br>Query: 1.02e-04 s |
| **1,000,000/5D** | Build: 21.3030 s<br>Query: 1.08e-04 s | Build: 0.0147 s<br>Query: 0.0060 s | Build: 163.9769 s<br>Query: 1.53e-04 s |