In [3]:
!pip install faiss-cpu
!pip install annoy
!pip install hnswlib

Collecting faiss-cpu
  Downloading faiss_cpu-1.12.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (5.1 kB)
Downloading faiss_cpu-1.12.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (31.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.4/31.4 MB[0m [31m55.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.12.0
Collecting annoy
  Using cached annoy-1.17.3.tar.gz (647 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: annoy
  Building wheel for annoy (setup.py) ... [?25l[?25hdone
  Created wheel for annoy: filename=annoy-1.17.3-cp312-cp312-linux_x86_64.whl size=551807 sha256=36f2bd83ac948c2828f321cbb067ed0f036901eb78e7b3d7df7ba9f9bfb9b27e
  Stored in directory: /root/.cache/pip/wheels/db/b9/53/a3b2d1fe1743abadddec6aa541294b24fdbc39d7800bc57311
Successfully built annoy
Installing collected packages: annoy
Successfully in

In [7]:
import pandas as pd
import numpy as np
import time
import faiss
from annoy import AnnoyIndex
import hnswlib
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import StandardScaler

file_path = '/content/drive/MyDrive/MACHINE LEARNING/p7/data/songs_with_attributes_and_lyrics.csv'

df = pd.read_csv(file_path)

features = ['danceability', 'energy', 'loudness', 'speechiness',
            'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo']

df_features = df[features].dropna()

X = df_features.values
n_data, dim = X.shape
print(f"Dataset siap digunakan: {n_data} lagu, {dim} fitur.")

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X).astype(np.float32)

k = 10

query_index = np.random.randint(0, n_data)
query_vector = X_scaled[query_index]
query_song_name = df.iloc[df_features.index[query_index]]['name']
print(f"\nMencari lagu yang mirip dengan: '{query_song_name}' (index: {query_index})\n")

results = {}

print("--- 1. Exact NN (Scikit-learn) ---")
start = time.time()
nn_exact = NearestNeighbors(n_neighbors=k, algorithm='brute', metric='euclidean')
nn_exact.fit(X_scaled)
build_time = time.time() - start
start = time.time()
distances, indices = nn_exact.kneighbors([query_vector])
query_time = time.time() - start
results['Exact NN'] = {'build': build_time, 'query': query_time, 'neighbors': indices[0]}
print(f"Build time: {build_time:.6f} detik")
print(f"Query time: {query_time:.6f} detik")
print(f"Neighbors: {indices[0]}\n")

print("--- 2. ANNOY ---")
start = time.time()
annoy_index = AnnoyIndex(dim, 'euclidean')
for i in range(n_data):
    annoy_index.add_item(i, X_scaled[i])
annoy_index.build(10)
build_time = time.time() - start
start = time.time()
neighbors_annoy = annoy_index.get_nns_by_vector(query_vector, k)
query_time = time.time() - start
results['ANNOY'] = {'build': build_time, 'query': query_time, 'neighbors': neighbors_annoy}
print(f"Build time: {build_time:.6f} detik")
print(f"Query time: {query_time:.6f} detik")
print(f"Neighbors: {neighbors_annoy}\n")

print("--- 3. FAISS ---")
start = time.time()
faiss_index = faiss.IndexFlatL2(dim)
faiss_index.add(X_scaled)
build_time = time.time() - start
start = time.time()
distances, indices_faiss = faiss_index.search(np.array([query_vector]), k)
query_time = time.time() - start
results['FAISS'] = {'build': build_time, 'query': query_time, 'neighbors': indices_faiss[0]}
print(f"Build time: {build_time:.6f} detik")
print(f"Query time: {query_time:.6f} detik")
print(f"Neighbors: {indices_faiss[0]}\n")

print("--- 4. HNSW (hnswlib) ---")
start = time.time()
hnsw_index = hnswlib.Index(space='l2', dim=dim)
hnsw_index.init_index(max_elements=n_data, ef_construction=200, M=16)
hnsw_index.add_items(X_scaled)
build_time = time.time() - start
hnsw_index.set_ef(50)
start = time.time()
labels_hnsw, distances = hnsw_index.knn_query(query_vector, k=k)
query_time = time.time() - start
results['HNSW'] = {'build': build_time, 'query': query_time, 'neighbors': labels_hnsw[0]}
print(f"Build time: {build_time:.6f} detik")
print(f"Query time: {query_time:.6f} detik")
print(f"Neighbors: {labels_hnsw[0]}\n")

print("\n--- Rangkuman Hasil Perbandingan ---")
summary_df = pd.DataFrame(results).T
summary_df = summary_df.rename(columns={'build': 'Build Time (s)', 'query': 'Query Time (s)'})
print(summary_df[['Build Time (s)', 'Query Time (s)']])

print("\n--- Perbandingan Hasil Tetangga Terdekat ---")
ground_truth = results['Exact NN']['neighbors']
for name, data in results.items():
    if name != 'Exact NN':
        common_neighbors = len(set(ground_truth) & set(data['neighbors']))
        accuracy = (common_neighbors / k) * 100
        print(f"Akurasi {name} vs Exact NN: {accuracy:.1f}%")

Dataset siap digunakan: 955320 lagu, 9 fitur.

Mencari lagu yang mirip dengan: 'Marigold' (index: 493116)

--- 1. Exact NN (Scikit-learn) ---
Build time: 0.034929 detik
Query time: 0.066931 detik
Neighbors: [493116 872195  96571 793617 719538 375604 594213 331739  17574 633663]

--- 2. ANNOY ---
Build time: 18.354215 detik
Query time: 0.000176 detik
Neighbors: [493116, 872195, 96571, 793617, 719538, 375604, 594213, 331739, 17574, 633663]

--- 3. FAISS ---
Build time: 0.063255 detik
Query time: 0.009616 detik
Neighbors: [493116 872195  96571 793617 719538 375604 594213 331739  17574 633663]

--- 4. HNSW (hnswlib) ---
Build time: 207.813764 detik
Query time: 0.000460 detik
Neighbors: [493116 872195  96571 793617 719538 375604 594213 331739  17574 633663]


--- Rangkuman Hasil Perbandingan ---
         Build Time (s) Query Time (s)
Exact NN       0.034929       0.066931
ANNOY         18.354215       0.000176
FAISS          0.063255       0.009616
HNSW         207.813764        0.00046

--