In [63]:
import numpy as np

In [64]:
data = np.array([[2.0, 3.0, 1.5],
                 [3.0, 3.5, 2.0],
                 [3.5, 3.0, 2.5],
                 [8.0, 8.0, 7.5],
                 [8.5, 8.5, 8.0],
                 [9.0, 8.0, 8.5],
                 [1.0, 2.0, 1.0],
                 [1.5, 2.5, 1.5]])

data.shape

(8, 3)

In [65]:
class KMeans:
    def __init__(self, k=3, max_iters=100):
        self.k = k
        self.max_iters = max_iters
        self.centroids = None
        self.clusters = None

    def initialize_centroids(self, data):
        self.centroids = data[np.random.choice(data.shape[0], self.k, replace=False)]

    def euclidean_distance(self, x1, x2):
        return np.sqrt(np.sum(np.power(x1 - x2, 2)))

    def assign_clusters(self, data):
        distances = np.array([[self.euclidean_distance(x, centroid) for centroid in self.centroids] for x in data])
        return np.argmin(distances, axis=1)

    def update_centroids(self, data):
        return np.array([data[self.clusters == i].mean(axis=0) for i in range(self.k)])

    def fit(self, data):
        for i in range(self.max_iters):
            self.clusters = self.assign_clusters(data)
            print('Iteration ', i)
            print(self.clusters)
            new_centroids = self.update_centroids(data)
            if np.all(self.centroids == new_centroids):
                break
            self.centroids = new_centroids

In [66]:
#02 clusters
kmeans = KMeans(k=2)
kmeans.initialize_centroids(data)
#select 02 centroids
kmeans.centroids[0] = data[0] # (2.0, 3.0, 1.5) #cluster 1
kmeans.centroids[1] = data[6] # (1.0, 2.0, 1.0) #cluster 2
#calculate distance
print(kmeans.centroids[0])
print(data[3])
distance = kmeans.euclidean_distance(kmeans.centroids[0], data[3])
print(distance)
# (3.0, 3.5, 2.0)
print(data)
kmeans.fit(data)


[2.  3.  1.5]
[8.  8.  7.5]
9.848857801796104
[[2.  3.  1.5]
 [3.  3.5 2. ]
 [3.5 3.  2.5]
 [8.  8.  7.5]
 [8.5 8.5 8. ]
 [9.  8.  8.5]
 [1.  2.  1. ]
 [1.5 2.5 1.5]]
Iteration  0
[0 0 0 0 0 0 1 0]
Iteration  1
[1 1 1 0 0 0 1 1]
Iteration  2
[1 1 1 0 0 0 1 1]


In [67]:
#03 clusters
kmeans = KMeans(k=3)
kmeans.initialize_centroids(data)

print(data)
kmeans.fit(data)
kmeans.centroids

[[2.  3.  1.5]
 [3.  3.5 2. ]
 [3.5 3.  2.5]
 [8.  8.  7.5]
 [8.5 8.5 8. ]
 [9.  8.  8.5]
 [1.  2.  1. ]
 [1.5 2.5 1.5]]
Iteration  0
[0 2 2 1 1 1 0 0]
Iteration  1
[0 2 2 1 1 1 0 0]


array([[1.5       , 2.5       , 1.33333333],
       [8.5       , 8.16666667, 8.        ],
       [3.25      , 3.25      , 2.25      ]])