In [1]:
import numpy as np
import pandas as pd
from sklearn.cluster import KMeans
from scipy.spatial.distance import cdist

# ===============================
# 1. Data Minimarket
# ===============================
X = np.array([
    [2, 40000],
    [3, 45000],
    [5, 52000],
    [10, 60000],
    [15, 65000],
    [30, 70000],
    [40, 80000]
])

# ===============================
# 2. Centroid Awal (sesuai manual)
# ===============================
centroid_awal = np.array([
    [2, 40000],     # C1
    [10, 60000],    # C2
    [30, 70000]     # C3
])

# ===============================
# 3. PERHITUNGAN MANUAL (1 KALI)
# ===============================
jarak = cdist(X, centroid_awal, metric='euclidean')
cluster_manual = np.argmin(jarak, axis=1)

centroid_baru = []
for i in range(len(centroid_awal)):
    centroid_baru.append(X[cluster_manual == i].mean(axis=0))

centroid_baru = np.array(centroid_baru)

# ===============================
# 4. PERHITUNGAN PROGRAM (K-Means)
# ===============================
kmeans = KMeans(
    n_clusters=3,
    init=centroid_awal,
    n_init=1,
    max_iter=1,
    random_state=42
)

kmeans.fit(X)

# ===============================
# 5. PERBANDINGAN HASIL
# ===============================
hasil = pd.DataFrame({
    "Data (Transaksi, Penjualan)": X.tolist(),
    "Cluster Manual": cluster_manual,
    "Cluster Program": kmeans.labels_
})

print("=== CENTROID AWAL ===")
print(centroid_awal)

print("\n=== CENTROID HASIL MANUAL ===")
print(centroid_baru)

print("\n=== CENTROID HASIL PROGRAM ===")
print(kmeans.cluster_centers_)

print("\n=== PERBANDINGAN CLUSTER ===")
print(hasil)


=== CENTROID AWAL ===
[[    2 40000]
 [   10 60000]
 [   30 70000]]

=== CENTROID HASIL MANUAL ===
[[2.50e+00 4.25e+04]
 [1.00e+01 5.90e+04]
 [3.50e+01 7.50e+04]]

=== CENTROID HASIL PROGRAM ===
[[2.50e+00 4.25e+04]
 [1.00e+01 5.90e+04]
 [3.50e+01 7.50e+04]]

=== PERBANDINGAN CLUSTER ===
  Data (Transaksi, Penjualan)  Cluster Manual  Cluster Program
0                  [2, 40000]               0                0
1                  [3, 45000]               0                0
2                  [5, 52000]               1                1
3                 [10, 60000]               1                1
4                 [15, 65000]               1                1
5                 [30, 70000]               2                2
6                 [40, 80000]               2                2
