In [1]:
import numpy as np

In [2]:
class KMeans:
    def __init__(self, k=8, i=300):
        self.no_of_clusters = k
        self.max_iter = i


    def fit(self, X):
        #loading the data
        no_of_samples, no_of_features = X.shape

        # Initialize centroids randomly
        self.centroids = X[np.random.choice(no_of_samples, self.no_of_clusters, replace=False)]

        for i in range(self.max_iter):
            #calculating the euclidean distance of each point from centroids
            distances = np.sqrt(((X - self.centroids[:, np.newaxis])**2).sum(axis=2))

            #assigning each data point to the nearest centroid i.e. with minimum distance
            labels = np.argmin(distances, axis=0)

            #finding mean of all the points that belong to a specific cluster
            #update the centroid value as the meaan value
            new_centroids = np.array([X[labels == k].mean(axis=0) for k in range(self.no_of_clusters)])

            #checking for convergence
            #finds if the previous centroids and new centroids are very different at each iteration
            #if not, it breaks the loop
            if np.allclose(self.centroids, new_centroids):
                break

            self.centroids = new_centroids


    def predict(self, X):
        #assigning each data point to the nearest centroid
        distances = np.sqrt(((X - self.centroids[:, np.newaxis])**2).sum(axis=2))
        return np.argmin(distances, axis=0)


In [4]:
X = np.array([[1, 2], [2, 3], [5, 6], [7, 8], [8, 7]])

kmeans = KMeans(k=2)
kmeans.fit(X)
labels = kmeans.predict(X)

print("Cluster Labels:", labels)

Cluster Labels: [0 0 1 1 1]
