# K-Means Clustering — From Scratch

Lloyd's algorithm with K-Means++ initialization. We'll use the Elbow Method to find the optimal k.

In [None]:
import sys, os
sys.path.insert(0, os.path.abspath(".."))

import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs

from kmeans.kmeans import KMeans

plt.style.use("seaborn-v0_8-darkgrid")

# Generate synthetic clusters
X, y_true = make_blobs(n_samples=500, centers=4, cluster_std=0.8, random_state=42)

# Elbow method
inertias = []
k_range = range(1, 11)
for k in k_range:
    km = KMeans(k=k, random_state=42)
    km.fit(X)
    inertias.append(km.inertia_)

# Best k = 4 (true clusters)
best_km = KMeans(k=4, random_state=42)
labels = best_km.fit_predict(X)

fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Elbow plot
axes[0].plot(list(k_range), inertias, marker="o", color="steelblue", lw=2)
axes[0].set_title("Elbow Method — Inertia vs k")
axes[0].set_xlabel("k")
axes[0].set_ylabel("Inertia (WCSS)")

# Cluster visualization
colors = plt.cm.tab10(np.linspace(0, 1, 4))
for c in range(4):
    mask = labels == c
    axes[1].scatter(X[mask, 0], X[mask, 1], s=15, alpha=0.7, color=colors[c], label=f"Cluster {c}")
axes[1].scatter(best_km.centroids_[:, 0], best_km.centroids_[:, 1],
                s=200, c="black", marker="X", zorder=5, label="Centroids")
axes[1].set_title("K-Means Clusters (k=4)")
axes[1].legend()

plt.tight_layout()
plt.show()