In [5]:
import numpy as np
from collections import defaultdict

# Sample data points
data_points = np.array([
    [1.0, 2.0],
    [1.5, 1.8],
    [5.0, 8.0],
    [8.0, 8.0],
    [1.0, 0.6],
    [9.0, 11.0]
])

# Initial centroids
centroids = np.array([
    [1.0, 2.0],
    [5.0, 8.0]
])

# Number of clusters
K = centroids.shape[0]

def calculate_distance(point1, point2):
    return np.sqrt(np.sum((point1 - point2) ** 2))

def mapper(data_points, centroids):
    mapped = defaultdict(list)
    
    for point in data_points:
        distances = [calculate_distance(point, centroid) for centroid in centroids]
        closest_centroid = np.argmin(distances)
        mapped[closest_centroid].append(point)
    
    return mapped

def reducer(mapped):
    new_centroids = []
    
    for centroid_index, points in mapped.items():
        if points:  # Check if the list is not empty
            new_centroid = np.mean(points, axis=0)
            new_centroids.append(new_centroid)
        else:
            new_centroids.append(None)  # Handle empty cluster case

    return new_centroids

def run_kmeans(data_points, centroids, max_iters=10, tol=1e-4):
    for _ in range(max_iters):
        mapped = mapper(data_points, centroids)
        new_centroids = reducer(mapped)
        
        new_centroids = [centroid for centroid in new_centroids if centroid is not None]

        if len(new_centroids) < K:
            print("Warning: Not all clusters have points assigned.")
            while len(new_centroids) < K:
                new_centroids.append(data_points[np.random.choice(data_points.shape[0])])
        
        new_centroids = np.array(new_centroids)

        # Check for convergence
        if np.all(np.abs(new_centroids - centroids) < tol):
            break

        centroids = new_centroids
        
    return centroids, mapped

# Run K-Means
final_centroids, mapped_clusters = run_kmeans(data_points, centroids)

# Output cluster assignments
cluster_assignments = []
for centroid_index, points in mapped_clusters.items():
    for point in points:
        cluster_assignments.append((point, centroid_index))

# Print cluster assignments
print("Cluster Assignments:")
for point, cluster in cluster_assignments:
    print(f"Point {point} is assigned to Cluster {cluster}")

# Print final centroids
print("\nFinal Centroid Values:")
for i, centroid in enumerate(final_centroids):
    print(f"Centroid {i}: {centroid}")

Cluster Assignments:
Point [1. 2.] is assigned to Cluster 0
Point [1.5 1.8] is assigned to Cluster 0
Point [1.  0.6] is assigned to Cluster 0
Point [5. 8.] is assigned to Cluster 1
Point [8. 8.] is assigned to Cluster 1
Point [ 9. 11.] is assigned to Cluster 1

Final Centroid Values:
Centroid 0: [1.16666667 1.46666667]
Centroid 1: [7.33333333 9.        ]
