In [3]:
import numpy as np
from scipy.spatial.distance import cdist

In [21]:
def initialize_centroids(X, k):
    rand_indices = np.random.choice(X.shape[0], k, replace=False)
    centroids = X[rand_indices[:]]
    return centroids

In [22]:
def assign_clusters(X, centroids):
    distances = cdist(X, centroids)
    return np.argmin(distances, axis=1)

In [23]:
def update_clusters(X, labels, k):
    new_centroids = []
    for i in range(k):
        new_centroid = np.mean(X[labels == i], axis=0)
        new_centroids.append(new_centroid)
    return np.array(new_centroids)

In [32]:
def kmeans(X, k, max_iterations=1000, tol=1e-7):
    centroids = initialize_centroids(X, k)
    for _ in range(max_iterations):
        labels = assign_clusters(X, centroids)
        new_centroids = update_clusters(X, labels, k)
        if np.sum(new_centroids - centroids) < tol:
            centroids = new_centroids
            break
        centroids = new_centroids
    return centroids, labels

In [33]:
X = np.array([[1, 2], [1, 4], [1, 0], [10, 2], [10, 4], [10, 0]])
k = 3
centroids, labels = kmeans(X, k)
print("Centroids:\n", centroids)
print("Labels:", labels)

Centroids:
 [[ 1.  2.]
 [10.  3.]
 [10.  0.]]
Labels: [0 0 0 1 1 2]
