In [8]:
%%writefile k_means.py
import numpy as np

class KMeansFromScratch:
    def __init__(self, n_clusters, max_iterations=100, min_improvement=1e-4):
        self.n_clusters = n_clusters
        self.max_iterations = max_iterations
        self.min_improvement = min_improvement
        self.centroids = None

    def fit(self, X):
        self.centroids = X[np.random.choice(X.shape[0], self.n_clusters, replace=False)]

        for _ in range(self.max_iterations):
            # Calculate distances to centroids
            distances = np.linalg.norm(X[:, np.newaxis] - self.centroids, axis=2)

            # Assign each data point to the nearest centroid
            labels = np.argmin(distances, axis=1)

            # Update centroids
            new_centroids = np.array([X[labels == j].mean(axis=0) for j in range(self.n_clusters)])

            # Check for convergence
            improvement = np.linalg.norm(new_centroids - self.centroids)
            if improvement < self.min_improvement:
                break

            self.centroids = new_centroids

        return self

    def transform(self, X):
        distances = np.linalg.norm(X[:, np.newaxis] - self.centroids, axis=2)
        return distances

    def get_centroids(self):
        return self.centroids

    def fit_transform(self, X):
        self.fit(X)
        return self.transform(X)


Overwriting k_means.py
