In [1]:
import numpy as np

In [2]:
class KMeans:
    def __init__(self, k, n_iters):
        self.k = k
        self.n_iters = n_iters

    # returns a matrix of eucludian distance values
    def _dist(self, centroid, X):
        return np.sqrt(np.sum((X - centroid)**2, axis = 1))

    def _get_assignments(self, centroids, X):
        #find dist for each from to the centroid
        euc_dists = []
        for centroid in centroids:
            euc_dists.append(self._dist(centroid, X))

        assignments = []
        for i in range(X.shape[0]):
            euc_dists_i = np.round([dist_mat[i] for dist_mat in euc_dists], 8)
            closest_centroid = np.argmin(euc_dists_i)
            assignments.append(closest_centroid)
        return np.array(assignments)

    def fit(self, X):
        # randomly choose centroids
        idxs = np.random.choice(len(X), self.k, replace=False)
        centroids = X[idxs]
        assignments = self._get_assignments(centroids, X)

        # repeat
        for _ in range(self.n_iters):
            # update centroids
            centroids = [X[assignments == i].mean(0) for i in range(self.k)]
            
            centroids = np.vstack(centroids)
            assignments = self._get_assignments(centroids, X)

        return assignments 

In [3]:
K=3
center_1 = np.array([1,1])
center_2 = np.array([5,5])
center_3 = np.array([8,1])

# Generate random data and center it to the three centers
cluster_1 = np.random.randn(100, 2) + center_1
cluster_2 = np.random.randn(100,2) + center_2
cluster_3 = np.random.randn(100,2) + center_3

X = np.concatenate((cluster_1, cluster_2, cluster_3), axis = 0)

In [4]:
kmeans = KMeans(K, 100)
kmeans.fit(X)

array([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])