## K-Means Clustering

### Using Eucledian Distance

In [7]:
import numpy as np
import matplotlib.pyplot as plt

class KMeans:
    def __init__(self, n_clusters, max_iters=100):
        self.n_clusters = n_clusters
        self.max_iters = max_iters
        self.history = []  # To store the history of centroids and labels for plotting

    def fit(self, X):
        # Randomly initialize centroids
        np.random.seed(43)
        random_indices = np.random.choice(len(X), self.n_clusters, replace=False)
        self.centroids = X[random_indices]

        for _ in range(self.max_iters):
            # Assign clusters
            distances = self._compute_distances(X)
            self.labels = np.argmin(distances, axis=1)

            # Store current state for plotting
            self.history.append((X.copy(), self.centroids.copy(), self.labels.copy()))

            # Update centroids
            new_centroids = np.array([X[self.labels == i].mean(axis=0) for i in range(self.n_clusters)])

            # Check for convergence
            if np.all(self.centroids == new_centroids):
                break

            self.centroids = new_centroids

    def _compute_distances(self, X):
        return np.linalg.norm(X[:, np.newaxis] - self.centroids, axis=2)

    def predict(self, X):
        distances = self._compute_distances(X)
        return np.argmin(distances, axis=1)

def generate_data(n_samples=100):
    """Generate synthetic data for clustering."""
    np.random.seed(0)
    cluster_1 = np.random.normal(loc=0, scale=0.5, size=(n_samples // 2, 2))
    cluster_2 = np.random.normal(loc=3, scale=0.5, size=(n_samples // 2, 2))
    return np.vstack((cluster_1, cluster_2))

def plot_kmeans_iterations(history):
    """Plot K-Means clustering iterations."""
    for i, (X, centroids, labels) in enumerate(history):
        plt.figure(figsize=(8, 6))
        plt.scatter(X[:, 0], X[:, 1], c=labels, cmap='viridis', alpha=0.6)
        plt.scatter(centroids[:, 0], centroids[:, 1], c='red', marker='X', s=200, label='Centroids')
        plt.title(f"K-Means Clustering - Iteration {i + 1}")
        plt.xlabel("Feature 1")
        plt.ylabel("Feature 2")
        plt.legend()
        plt.grid(True)
        plt.savefig(f"k_means_out/kmeans_iteration_{i + 1}.png")  # Save each plot as a PNG file
        plt.close()  # Close the plot to save memory

# Generate synthetic data
X = generate_data(n_samples=100)

# Run K-Means
kmeans = KMeans(n_clusters=2)
kmeans.fit(X)

# Plot the iterations
plot_kmeans_iterations(kmeans.history)

print("K-Means clustering iterations saved as PNG files.")


K-Means clustering iterations saved as PNG files.
