1. Initialization: Data points are provided, and we initialize 𝑘=3 centroids (initial_centroids).
2. Distance Calculation: For each point, calculate its absolute distance from all centroids.
3. Cluster Assignment: Assign points to the nearest centroid.
4. Centroid Update: Compute the mean of points in each cluster as the new centroid.
5. Visualization: Each iteration plots the current state of clusters and centroids.
6. Convergence: Iterations stop when centroids no longer change.

In [None]:
import matplotlib.pyplot as plt
import numpy as np

In [None]:
# Step 1: Initialize data points and centroids
data = data = np.sort(np.random.randint(0, 5000, 500))
data_mean = sum(data) / len(data)
initial_centroids = [data_mean - 100, data_mean, data_mean + 100]  # Initial centroids

In [None]:
def calculate_distance(point, centroids):
    return [abs(point - c) for c in centroids]

In [None]:
def assign_clusters(data, centroids):
    clusters = {i: [] for i in range(len(centroids))}
    for point in data:
        distances = calculate_distance(point, centroids)
        cluster_index = distances.index(min(distances))  # Nearest centroid
        clusters[cluster_index].append(point)
    return clusters

In [None]:
def update_centroids(clusters):
    return [sum(points) / len(points) if points else 0 for points in clusters.values()]

In [None]:
# Visualization function
def visualize_iteration(data, centroids, clusters, iteration):
    colors = ['red', 'blue', 'green']  # Different colors for clusters
    plt.figure(figsize=(10, 6))
    
    # Plot data points
    for cluster_idx, points in clusters.items():
        plt.scatter(points, [0] * len(points), color=colors[cluster_idx], label=f"Cluster {cluster_idx + 1}")
    
    # Plot centroids
    plt.scatter(centroids, [0] * len(centroids), color='black', marker='x', s=200, label='Centroids')
    
    # plt.title(f"Iteration {iteration}")
    # plt.xlabel("Data Points")
    # plt.yticks([])  # Hide y-axis for clarity
    # plt.legend()
    plt.imshow(im)
    plt.show()

In [None]:
# K-means iterations with visualization
centroids = initial_centroids[:]
for iteration in range(1, 10):  # Arbitrary max iterations
    clusters = assign_clusters(data, centroids)
    new_centroids = update_centroids(clusters)
    
    # Visualize the current iteration
    visualize_iteration(data, centroids, clusters, iteration)
    
    # Check for convergence
    if centroids == new_centroids:
        print(f"Converged after {iteration} iterations!")
        break
    centroids = new_centroids

# Final output
print("Final Clusters:", clusters)
print("Final Centroids:", centroids)
