In [None]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import silhouette_score
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt


class CustomKMeans:
    def _init_(self, k=3, max_iters=300, tol=1e-4):
        self.k = k
        self.max_iters = max_iters
        self.tol = tol

    def initialize_centroids(self, X):
        np.random.seed(42)
        indices = np.random.choice(len(X), self.k, replace=False)
        return X[indices]

    def assign_clusters(self, X, centroids):
        distances = np.linalg.norm(X[:, np.newaxis] - centroids, axis=2)
        return np.argmin(distances, axis=1)

    def update_centroids(self, X, labels):
        centroids = []
        for i in range(self.k):
            cluster_points = X[labels == i]
            if len(cluster_points) > 0:
                centroids.append(cluster_points.mean(axis=0))
            else:
                centroids.append(np.zeros(X.shape[1]))
        return np.array(centroids)

    def fit(self, X):
        self.centroids = self.initialize_centroids(X)

        for i in range(self.max_iters):
            labels = self.assign_clusters(X, self.centroids)
            new_centroids = self.update_centroids(X, labels)

            if np.linalg.norm(self.centroids - new_centroids) < self.tol:
                break

            self.centroids = new_centroids

        self.labels_ = labels
        return self

    def predict(self, X):
        return self.assign_clusters(X, self.centroids)


# --------------------------
# Load and preprocess data
# --------------------------
iris = load_iris()
X = iris.data

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# --------------------------
# Fit custom KMeans
# --------------------------
k = 3
custom_kmeans = CustomKMeans(k=k)
custom_kmeans.fit(X_scaled)
custom_labels = custom_kmeans.labels_

custom_silhouette = silhouette_score(X_scaled, custom_labels)
print("Custom KMeans Silhouette Score:", custom_silhouette)

# --------------------------
# Compare with sklearn KMeans
# --------------------------
sk_kmeans = KMeans(n_clusters=3, random_state=42)
sk_labels = sk_kmeans.fit_predict(X_scaled)
sk_silhouette = silhouette_score(X_scaled, sk_labels)
print("Sklearn KMeans Silhouette Score:", sk_silhouette)

# --------------------------
# PCA Visualization
# --------------------------
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_scaled)

plt.figure(figsize=(7, 5))
plt.scatter(X_pca[:, 0], X_pca[:, 1], c=custom_labels)
plt.title("Custom KMeans Clusters (PCA 2D)")
plt.xlabel("PC1")
plt.ylabel("PC2")
plt.show()


