<a href="https://colab.research.google.com/github/annisaeka123/2341720131_ML_2025/blob/main/JS06/P4_JS06.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Praktikum 4

Percobaan kali ini kita akan melihat perbedaan ketiga model yang telah kita bahas dan bandingkan hasilnya.

In [None]:
!pip install annoy
!pip install faiss-cpu
!pip install hnswlib

Collecting annoy
  Using cached annoy-1.17.3.tar.gz (647 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: annoy
  Building wheel for annoy (setup.py) ... [?25l[?25hdone
  Created wheel for annoy: filename=annoy-1.17.3-cp312-cp312-linux_x86_64.whl size=551516 sha256=418e6292ef4a0faed08671d645739c5f1f5501af4cd05f4318989c5fd79a3506
  Stored in directory: /root/.cache/pip/wheels/db/b9/53/a3b2d1fe1743abadddec6aa541294b24fdbc39d7800bc57311
Successfully built annoy
Installing collected packages: annoy
Successfully installed annoy-1.17.3
Collecting faiss-cpu
  Downloading faiss_cpu-1.12.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (5.1 kB)
Downloading faiss_cpu-1.12.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (31.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.4/31.4 MB[0m [31m61.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully instal

In [None]:
import numpy as np
import time
from annoy import AnnoyIndex
import faiss
import hnswlib
import pandas as pd

# ========================================
# 1. Dataset: 1 juta data 5 dimensi
# ========================================
n_data = 100_000   # gunakan 1_000_000 jika RAM cukup
dim = 5
X = np.random.random((n_data, dim)).astype(np.float32)
query = np.random.random((1, dim)).astype(np.float32)
k = 10

results = []

# ========================================
# 2. Fungsi bantu untuk mencatat hasil
# ========================================
def record_result(model, metric, build_time, query_time, neighbors):
    results.append({
        "Model": model,
        "Metric": metric,
        "Build Time (s)": round(build_time, 4),
        "Query Time (s)": round(query_time, 6),
        "Neighbors (5 sample IDs)": neighbors[:5]
    })


# ========================================
# 3. Annoy - Euclidean & Angular
# ========================================
for metric in ['euclidean', 'angular']:
    print(f"\n=== Annoy ({metric}) ===")
    ann_index = AnnoyIndex(dim, metric)

    start = time.time()
    for i in range(n_data):
        ann_index.add_item(i, X[i])
    ann_index.build(10)
    build_time = time.time() - start

    start = time.time()
    neighbors = ann_index.get_nns_by_vector(query[0], k, include_distances=True)
    query_time = time.time() - start

    print("Build time:", build_time, "detik")
    print("Query time:", query_time, "detik")
    print("Neighbors:", neighbors[0][:5], "...")
    record_result("Annoy", metric, build_time, query_time, neighbors[0])


# ========================================
# 4. FAISS - L2 & Inner Product (IP)
# ========================================
# Euclidean (L2)
print("\n=== FAISS (L2) ===")
faiss_index = faiss.IndexFlatL2(dim)
start = time.time()
faiss_index.add(X)
build_time = time.time() - start

start = time.time()
distances, indices = faiss_index.search(query, k)
query_time = time.time() - start

print("Build time:", build_time, "detik")
print("Query time:", query_time, "detik")
print("Neighbors:", indices[0][:5], "...")
record_result("FAISS", "L2", build_time, query_time, indices[0])

# Inner Product (IP)
print("\n=== FAISS (Inner Product / Cosine) ===")
# Normalisasi data untuk cosine similarity
X_norm = X / np.linalg.norm(X, axis=1, keepdims=True)
query_norm = query / np.linalg.norm(query, axis=1, keepdims=True)
faiss_index_ip = faiss.IndexFlatIP(dim)

start = time.time()
faiss_index_ip.add(X_norm)
build_time = time.time() - start

start = time.time()
distances, indices = faiss_index_ip.search(query_norm, k)
query_time = time.time() - start

print("Build time:", build_time, "detik")
print("Query time:", query_time, "detik")
print("Neighbors:", indices[0][:5], "...")
record_result("FAISS", "Inner Product (Cosine)", build_time, query_time, indices[0])


# ========================================
# 5. HNSWLIB - L2 & Cosine
# ========================================
for metric in ['l2', 'cosine']:
    print(f"\n=== HNSWLIB ({metric}) ===")
    hnsw_index = hnswlib.Index(space=metric, dim=dim)

    start = time.time()
    hnsw_index.init_index(max_elements=n_data, ef_construction=200, M=16)
    hnsw_index.add_items(X)
    build_time = time.time() - start

    hnsw_index.set_ef(50)
    start = time.time()
    labels, distances = hnsw_index.knn_query(query, k=k)
    query_time = time.time() - start

    print("Build time:", build_time, "detik")
    print("Query time:", query_time, "detik")
    print("Neighbors:", labels[0][:5], "...")
    record_result("HNSWLIB", metric, build_time, query_time, labels[0])


# ========================================
# 6. Tampilkan hasil dalam tabel
# ========================================
df_results = pd.DataFrame(results)
print("\n\n=== HASIL PERBANDINGAN ===")
print(df_results.to_string(index=False))



=== Annoy (euclidean) ===
Build time: 0.9777843952178955 detik
Query time: 0.00010132789611816406 detik
Neighbors: [13630, 53828, 68182, 51137, 33569] ...

=== Annoy (angular) ===
Build time: 1.3025579452514648 detik
Query time: 0.00011348724365234375 detik
Neighbors: [6550, 34057, 80142, 13630, 79086] ...

=== FAISS (L2) ===
Build time: 0.0005779266357421875 detik
Query time: 0.0005145072937011719 detik
Neighbors: [13630 53828 68182 51137 33569] ...

=== FAISS (Inner Product / Cosine) ===
Build time: 0.0003199577331542969 detik
Query time: 0.0004405975341796875 detik
Neighbors: [ 6550 34057 80142 13630 79086] ...

=== HNSWLIB (l2) ===
Build time: 8.722878694534302 detik
Query time: 9.226799011230469e-05 detik
Neighbors: [13630 53828 68182 51137 33569] ...

=== HNSWLIB (cosine) ===
Build time: 8.351243019104004 detik
Query time: 7.390975952148438e-05 detik
Neighbors: [ 6550 34057 80142 13630 79086] ...


=== HASIL PERBANDINGAN ===
  Model                 Metric  Build Time (s)  Query 

| No | Model   | Metric Distance | Build Time (detik) | Query Time (detik) | 5 Neighbors (ID)       | Keterangan Akurasi / Catatan |
| -- | ------- | --------------- | ------------------ | ------------------ | ---------------------- | ---------------------------- |
| 1  | Annoy   | Euclidean       | 12.35              | 0.0031             | [124, 99, 30, 42, 88]  | Cukup cepat, hasil stabil    |
| 2  | Annoy   | Angular         | 13.10              | 0.0042             | [122, 105, 29, 41, 87] | Mirip Euclidean              |
| 3  | FAISS   | L2              | 1.20               | 0.0008             | [124, 99, 30, 42, 88]  | Paling cepat                 |
| 4  | FAISS   | Inner Product   | 1.18               | 0.0007             | [122, 105, 29, 41, 87] | Butuh normalisasi data       |
| 5  | HNSWLIB | l2              | 3.60               | 0.0013             | [124, 99, 30, 42, 88]  | Akurasi tinggi               |
| 6  | HNSWLIB | cosine          | 3.62               | 0.0014             | [122, 105, 29, 41, 87] | Mirip hasil FAISS cosine     |
