# K-Means Clustering from Scratch (NumPy Only)


In [ ]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs


In [ ]:
np.random.seed(42)
X, y_true = make_blobs(n_samples=500, centers=5, cluster_std=1.2, random_state=42)
X.shape


In [ ]:
def euclidean_distance(a, b):
    return np.sqrt(np.sum((a - b) ** 2))


In [ ]:
def kmeans_numpy(X, k, max_iters=100, tol=1e-4):
    n_samples, _ = X.shape
    indices = np.random.choice(n_samples, k, replace=False)
    centroids = X[indices]

    for _ in range(max_iters):
        clusters = [[] for _ in range(k)]
        for idx, sample in enumerate(X):
            distances = [euclidean_distance(sample, c) for c in centroids]
            clusters[np.argmin(distances)].append(idx)

        old_centroids = centroids.copy()
        for i in range(k):
            if clusters[i]:
                centroids[i] = np.mean(X[clusters[i]], axis=0)

        if np.linalg.norm(centroids - old_centroids) < tol:
            break

    labels = np.zeros(n_samples, dtype=int)
    for i, cluster in enumerate(clusters):
        for idx in cluster:
            labels[idx] = i

    return labels, centroids


In [ ]:
labels, centroids = kmeans_numpy(X, 5)
labels[:10]


In [ ]:
def silhouette_score_numpy(X, labels):
    n_samples = X.shape[0]
    unique_labels = np.unique(labels)
    scores = []

    for i in range(n_samples):
        same_cluster = X[labels == labels[i]]
        other_clusters = [X[labels == l] for l in unique_labels if l != labels[i]]

        a = np.mean([euclidean_distance(X[i], p) for p in same_cluster if not np.array_equal(p, X[i])])
        b = min(np.mean([euclidean_distance(X[i], p) for p in cluster]) for cluster in other_clusters)

        scores.append((b - a) / max(a, b))

    return np.mean(scores)


In [ ]:
score = silhouette_score_numpy(X, labels)
score


In [ ]:
plt.figure(figsize=(8,6))
plt.scatter(X[:,0], X[:,1], c=labels, cmap='tab10')
plt.scatter(centroids[:,0], centroids[:,1], c='black', marker='X', s=200)
plt.title('K-Means from Scratch (NumPy)')
plt.show()
