In [1]:
%pip install hnswlib

Collecting hnswlib
  Downloading hnswlib-0.8.0.tar.gz (36 kB)
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Preparing metadata (pyproject.toml): started
  Preparing metadata (pyproject.toml): finished with status 'done'
Building wheels for collected packages: hnswlib
  Building wheel for hnswlib (pyproject.toml): started
  Building wheel for hnswlib (pyproject.toml): finished with status 'done'
  Created wheel for hnswlib: filename=hnswlib-0.8.0-cp312-cp312-win_amd64.whl size=160821 sha256=11fd6f3b5b9285d543c33f0ca9839c8706749c658c773d664346b4cd05a4b724
  Stored in directory: c:\users\gio\appdata\local\pip\cache\wheels\ac\39\b3\cbd7f9cbb76501d2d5fbc84956e70d0b94e788aac87bda465e
Successfully built hnswlib
Installing collected packages: hnswlib
Successfully installed hnswlib-0.8.0
Note: you may need to restart the kern

In [6]:
import hnswlib
import numpy as np
import time
from sklearn.neighbors import NearestNeighbors

# ===========================
# 1. Buat data 2D acak
# ===========================
num_elements = 1000000
dim = 5
data = np.random.random((num_elements, dim)).astype(np.float32)

# Query point
query = np.array([[0.5, 0.5, 0.5, 0.5, 0.5]], dtype=np.float32)
k = 5  # cari 5 tetangga terdekat

# ===========================
# 2. Exact NN (Brute Force)
# ===========================
nn = NearestNeighbors(n_neighbors=k, algorithm='brute', metric='euclidean')
nn.fit(data)

start = time.time()
distances, indices = nn.kneighbors(query)
end = time.time()

print("=== Exact NN ===")
print("Indices:", indices)
print("Distances:", distances)
print("Waktu:", end - start, "detik")

# ===========================
# 3. HNSW
# ===========================
# Inisialisasi index HNSW
p = hnswlib.Index(space='l2', dim=dim)  

# Ukuran maksimum elemen yang bisa ditampung
p.init_index(max_elements=num_elements, ef_construction=100, M=16)

# Tambahkan data
p.add_items(data)

# Set parameter pencarian
p.set_ef(50)   # tradeoff speed vs accuracy

start = time.time()
labels, distances = p.knn_query(query, k=k)
end = time.time()

print("\n=== HNSW ===")
print("Indices:", labels)
print("Distances:", distances)
print("Waktu:", end - start, "detik")


=== Exact NN ===
Indices: [[876746  57818 672825 696473 501303]]
Distances: [[0.03323986 0.04970084 0.05442793 0.05801032 0.05980372]]
Waktu: 0.006997346878051758 detik

=== HNSW ===
Indices: [[876746  57818 672825 696473 501303]]
Distances: [[0.00110489 0.00247017 0.0029624  0.0033652  0.00357648]]
Waktu: 0.0 detik


akukan percobaan pada metric distance yang berbeda, 1000 vs 1jt data, 2D vs 5D data. catat hasilnya pada tabel yang anda buat sendiri seperti pada praktikum 1.

| Jumlah data | Dimensi | Hasil Index terdekat ENN vs HNSW | Waktu komputasi ENN vs HNSW |
|------------|---------|--------------------------------|-------------------|
| 1000       | 2D      | [978 726 232 779 974] vs [978 726 232 779 974] | 0.00797724723815918 s vs 0.0 s |
| 1000000    | 2D      | [745529 678190 266006 415014 916343] vs [745529 678190 266006 415014 916343] | 0.0040018558502197266 s vs 0.0 s |
| 1000       | 5D      | [651 652  94 864 997] vs [651 652  94 864 997] | 0.010999202728271484 s vs 0.0010004043579101562 s |
| 1000000    | 5D      | [876746  57818 672825 696473 501303] vs [876746  57818 672825 696473 501303] | 0.006997346878051758 s vs 0.0 s |