#K-MEANS CLUSTERING

In [1]:
import numpy as np

class KMeans:
    def __init__(self, K=3, max_iters=100):
        self.K = K
        self.max_iters = max_iters
        self.centroids = None

    def fit(self, X):
        random_indices = np.random.choice(len(X), self.K, replace=False)
        self.centroids = X[random_indices]

        for _ in range(self.max_iters):
            distances = self._compute_distances(X)
            clusters = np.argmin(distances, axis=1)
            new_centroids = np.array([X[clusters == k].mean(axis=0) for k in range(self.K)])
            if np.allclose(self.centroids, new_centroids):
                break
            self.centroids = new_centroids
        self.labels = clusters

    def _compute_distances(self, X):
        distances = np.zeros((len(X), self.K))
        for k in range(self.K):
            distances[:, k] = np.linalg.norm(X - self.centroids[k], axis=1)
        return distances

    def predict(self, X):
        distances = self._compute_distances(X)
        return np.argmin(distances, axis=1)

In [2]:
from sklearn.datasets import load_iris
import numpy as np

data = load_iris()
X = data.data

kmeans = KMeans(K=3)
kmeans.fit(X)

print("Centroids:\n", kmeans.centroids)
print("Labels:", kmeans.labels)


Centroids:
 [[6.85       3.07368421 5.74210526 2.07105263]
 [5.006      3.428      1.462      0.246     ]
 [5.9016129  2.7483871  4.39354839 1.43387097]]
Labels: [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 0 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 0 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 0 2 0 0 0 0 2 0 0 0 0
 0 0 2 2 0 0 0 0 2 0 2 0 2 0 0 2 2 0 0 0 0 0 2 0 0 0 0 2 0 0 0 2 0 0 0 2 0
 0 2]
