In [1]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.metrics import silhouette_score
import matplotlib.pyplot as plt

# Your custom functions
def euclidean_distance_manual(a, b):
    squared_diff = [(x - y) ** 2 for x, y in zip(a, b)]
    return sum(squared_diff) ** 0.5

def kmeans_from_scratch(X, k=3, max_iters=100, tol=1e-4):
    np.random.seed(42)
    n_samples, n_features = X.shape
    initial_indices = np.random.choice(n_samples, k, replace=False)
    centroids = X[initial_indices]

    for iteration in range(max_iters):
        distances = []
        for point in X:
            point_distances = []
            for centroid in centroids:
                dist = euclidean_distance_manual(point, centroid)
                point_distances.append(dist)
            distances.append(point_distances)
        distances = np.array(distances)
        labels = np.argmin(distances, axis=1)

        new_centroids = np.array([X[labels == i].mean(axis=0) for i in range(k)])

        diffs = [euclidean_distance_manual(c_old, c_new) for c_old, c_new in zip(centroids, new_centroids)]
        if sum(diffs) < tol:
            print(f"Converged in {iteration + 1} iterations.")
            break
        centroids = new_centroids

    sse = 0
    for i in range(k):
        cluster_points = X[labels == i]
        cluster_distances = [euclidean_distance_manual(point, centroids[i]) ** 2 for point in cluster_points]
        sse += sum(cluster_distances)

    return labels, centroids, sse

# Load Iris dataset
iris = load_iris()
X = iris.data  # Ignore true labels for clustering

# Apply K-Means
labels, centroids, sse = kmeans_from_scratch(X, k=3)

# Silhouette Score
score = silhouette_score(X, labels)
print(f"Silhouette Score: {score:.4f}")
print(f"Sum of Squared Errors (SSE): {sse:.2f}")




Converged in 6 iterations.
Silhouette Score: 0.5528
Sum of Squared Errors (SSE): 78.85
