In [1]:
import numpy as np

# Initial data and centroids
X = np.array([
    [5.1, 3.5], [4.9, 3.0], [5.8, 2.7], [6.0, 3.0], [6.7, 3.1],
    [4.5, 2.3], [6.1, 2.8], [5.2, 3.2], [5.5, 2.6], [5.0, 2.0],
    [8.0, 0.5], [7.5, 0.8], [8.1, -0.1], [2.5, 3.5], [1.0, 3.0],
    [4.5, -1.0], [3.0, -0.5], [5.1, -0.2], [6.0, -1.5], [3.5, -0.1],
    [4.0, 0.0], [6.1, 0.5], [5.4, -0.5], [5.3, 0.3], [5.8, 0.6]
])
centroids = np.array([[3.0, 3.0], [2.0, 2.0]])

# KMeans function
def kmeans(X, centroids, max_iters=100, tol=1e-4):
    k = len(centroids)
    for _ in range(max_iters):
        # Assignment step
        distances = np.linalg.norm(X[:, np.newaxis] - centroids, axis=2)
        cluster_labels = np.argmin(distances, axis=1)

        # Update step
        new_centroids = np.array([X[cluster_labels == i].mean(axis=0) if len(X[cluster_labels == i]) > 0 else centroids[i] for i in range(k)])

        # Check for convergence
        if np.linalg.norm(new_centroids - centroids) < tol:
            break

        centroids = new_centroids

    return cluster_labels, centroids

# Run KMeans
cluster_labels, final_centroids = kmeans(X, centroids)

# Display results
print("Final Centroids:")
print(final_centroids)
print("Cluster Assignments:")
print(cluster_labels)

Final Centroids:
[[ 5.8         2.125     ]
 [ 4.2        -0.05555556]]
Cluster Assignments:
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 1 1 0]


In [2]:
import numpy as np

# Data points (X)
X = np.array([
    [5.1, 3.5], [4.9, 3.0], [5.8, 2.7], [6.0, 3.0], [6.7, 3.1],
    [4.5, 2.3], [6.1, 2.8], [5.2, 3.2], [5.5, 2.6], [5.0, 2.0],
    [8.0, 0.5], [7.5, 0.8], [8.1, -0.1], [2.5, 3.5], [1.0, 3.0],
    [4.5, -1.0], [3.0, -0.5], [5.1, -0.2], [6.0, -1.5], [3.5, -0.1],
    [4.0, 0.0], [6.1, 0.5], [5.4, -0.5], [5.3, 0.3], [5.8, 0.6]
])

# Initial centroids
centroids = np.array([[3.0, 3.0], [2.0, 2.0]])

def kmeans(X, centroids, max_iters=100, tol=1e-4):
    """
    Implements the KMeans clustering algorithm from scratch.

    Parameters:
    - X: Data points (numpy array)
    - centroids: Initial centroids (numpy array)
    - max_iters: Maximum number of iterations
    - tol: Convergence threshold

    Returns:
    - cluster_labels: Cluster assignments for each data point
    - centroids: Final centroids
    """
    k = len(centroids)  # Number of clusters
    for iteration in range(max_iters):
        print(f"Iteration {iteration + 1}:")
        
        # Assignment Step: Assign each point to the nearest centroid
        distances = np.linalg.norm(X[:, np.newaxis] - centroids, axis=2)
        cluster_labels = np.argmin(distances, axis=1)
        
        # Update Step: Recalculate centroids
        new_centroids = np.array([
            X[cluster_labels == i].mean(axis=0) if np.any(cluster_labels == i) else centroids[i]
            for i in range(k)
        ])
        
        # Debug output
        print(f"Centroids after iteration {iteration + 1}: {new_centroids}")
        
        # Convergence Check
        if np.linalg.norm(new_centroids - centroids) < tol:
            print("Convergence reached.")
            break
        
        centroids = new_centroids

    return cluster_labels, centroids

# Run the KMeans algorithm
cluster_labels, final_centroids = kmeans(X, centroids)

# Output results
print("\nFinal Centroids:")
print(final_centroids)
print("\nCluster Assignments:")
print(cluster_labels)

Iteration 1:
Centroids after iteration 1: [[ 5.77058824  2.01764706]
 [ 4.0625     -0.1       ]]
Iteration 2:
Centroids after iteration 2: [[ 5.8         2.125     ]
 [ 4.2        -0.05555556]]
Iteration 3:
Centroids after iteration 3: [[ 5.8         2.125     ]
 [ 4.2        -0.05555556]]
Convergence reached.

Final Centroids:
[[ 5.8         2.125     ]
 [ 4.2        -0.05555556]]

Cluster Assignments:
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 1 1 0]
