K-Means Without Scikit

In [1]:
import numpy as np

def k_means(data, k, max_iters=100):
    n_samples, n_features = data.shape
    centroids = data[np.random.choice(n_samples, k, replace=False)]

    for _ in range(max_iters):
        distances = np.linalg.norm(data[:, np.newaxis] - centroids, axis=2)
        clusters = np.argmin(distances, axis=1)

        new_centroids = np.array([data[clusters == i].mean(axis=0) for i in range(k)])
        
        if np.all(centroids == new_centroids):
            break
        centroids = new_centroids

    return clusters, centroids

# Example usage
data = np.array([[1, 2], [1, 4], [1, 0], 
                 [10, 2], [10, 4], [10, 0]])
k = 2

clusters, centroids = k_means(data, k)
print("Clusters:", clusters)
print("Centroids:", centroids)

Clusters: [1 1 1 0 0 0]
Centroids: [[10.  2.]
 [ 1.  2.]]


K-means With Scikit

In [2]:
from sklearn.cluster import KMeans
import numpy as np

data = np.array([[1, 2], [1, 4], [1, 0], 
                 [10, 2], [10, 4], [10, 0]])

k = 2
kmeans = KMeans(n_clusters=k, random_state=42)
kmeans.fit(data)

clusters = kmeans.labels_
centroids = kmeans.cluster_centers_

print("Clusters:", clusters)
print("Centroids:", centroids)

Clusters: [0 0 0 1 1 1]
Centroids: [[ 1.  2.]
 [10.  2.]]
