## K-Medoids Clustering

### Using Manhattan Distance

In [3]:
import numpy as np
import matplotlib.pyplot as plt

class KMedoids:
    def __init__(self, n_clusters, max_iters=100):
        self.n_clusters = n_clusters
        self.max_iters = max_iters
        self.history = []  # To store the history of medoids and labels for plotting

    def fit(self, X):
        # Randomly initialize medoids
        np.random.seed(41)
        random_indices = np.random.choice(len(X), self.n_clusters, replace=False)
        self.medoids = X[random_indices]

        for _ in range(self.max_iters):
            # Assign clusters
            distances = self._compute_distances(X)
            self.labels = np.argmin(distances, axis=1)

            # Store current state for plotting
            self.history.append((X.copy(), self.medoids.copy(), self.labels.copy()))

            # Update medoids
            new_medoids = np.array([self._find_best_medoid(X[self.labels == i]) for i in range(self.n_clusters)])

            # Check for convergence
            if np.all(self.medoids == new_medoids):
                break

            self.medoids = new_medoids

    def _compute_distances(self, X):
        """Compute Manhattan distances from data points to medoids."""
        distances = np.zeros((X.shape[0], self.n_clusters))
        for i in range(self.n_clusters):
            distances[:, i] = np.sum(np.abs(X - self.medoids[i]), axis=1)
        return distances

    def _find_best_medoid(self, cluster):
        """Find the best medoid in a given cluster."""
        distances = np.sum(self._compute_distances(cluster), axis=1)
        return cluster[np.argmin(distances)]

    def predict(self, X):
        distances = self._compute_distances(X)
        return np.argmin(distances, axis=1)

def generate_data(n_samples=100):
    """Generate synthetic data for clustering."""
    np.random.seed(0)
    cluster_1 = np.random.normal(loc=0, scale=0.5, size=(n_samples // 2, 2))
    cluster_2 = np.random.normal(loc=3, scale=0.5, size=(n_samples // 2, 2))
    return np.vstack((cluster_1, cluster_2))

def plot_kmedoids_iterations(history):
    """Plot K-Medoids clustering iterations."""
    for i, (X, medoids, labels) in enumerate(history):
        plt.figure(figsize=(8, 6))
        plt.scatter(X[:, 0], X[:, 1], c=labels, cmap='viridis', alpha=0.6)
        plt.scatter(medoids[:, 0], medoids[:, 1], c='red', marker='X', s=200, label='Medoids')
        plt.title(f"K-Medoids Clustering - Iteration {i + 1}")
        plt.xlabel("Feature 1")
        plt.ylabel("Feature 2")
        plt.legend()
        plt.grid(True)
        plt.savefig(f"k_medoids_out/kmedoids_iteration_{i + 1}.png")  # Save each plot as a PNG file
        plt.close()  # Close the plot to save memory

# Generate synthetic data
X = generate_data(n_samples=100)

# Run K-Medoids
kmedoids = KMedoids(n_clusters=2)
kmedoids.fit(X)

# Plot the iterations
plot_kmedoids_iterations(kmedoids.history)

print("K-Medoids clustering iterations saved as PNG files.")


K-Medoids clustering iterations saved as PNG files.
