In [11]:
import numpy as np

def k_means(x, u, max_iter):
    for iteration in range(max_iter):
        print(f"Iteration {iteration + 1}:")
        # Step 1: Assign each data point to the nearest centroid
        clusters = [[] for _ in range(len(u))]
        for point in x:
            distances = [np.linalg.norm(point - centroid) for centroid in u]
            nearest_centroid_index = np.argmin(distances)
            clusters[nearest_centroid_index].append(point)
        
        # Step 2: Update centroids to the mean of their assigned points
        new_centroids = []
        for cluster in clusters:
            if len(cluster) > 0:
                new_centroid = np.mean(cluster, axis=0)
                new_centroids.append(new_centroid)
            else:
                # If a centroid has no points assigned to it, keep it the same
                new_centroids.append(u[len(new_centroids)])
        
        # Converting list of centroids to numpy array
        u = np.array(new_centroids)
        
        # Print current centroids and clusters
        print("Centroids:")
        print(u)
        print("Clusters:")
        for i, cluster in enumerate(clusters):
            print(f"Cluster {i + 1}: {cluster}")
        print("--------------------")

# Given input
x = np.array([[1,2],[1,1],[2,1],[5,1],[6,1],[6,2]])  # inputs
u = np.array([[1,3],[2,2]])  # centroids
max_iter = 5

# Run K-means algorithm
k_means(x, u, max_iter)


Iteration 1:
Centroids:
[[1.  2. ]
 [4.  1.2]]
Clusters:
Cluster 1: [array([1, 2])]
Cluster 2: [array([1, 1]), array([2, 1]), array([5, 1]), array([6, 1]), array([6, 2])]
--------------------
Iteration 2:
Centroids:
[[1.33333333 1.33333333]
 [5.66666667 1.33333333]]
Clusters:
Cluster 1: [array([1, 2]), array([1, 1]), array([2, 1])]
Cluster 2: [array([5, 1]), array([6, 1]), array([6, 2])]
--------------------
Iteration 3:
Centroids:
[[1.33333333 1.33333333]
 [5.66666667 1.33333333]]
Clusters:
Cluster 1: [array([1, 2]), array([1, 1]), array([2, 1])]
Cluster 2: [array([5, 1]), array([6, 1]), array([6, 2])]
--------------------
Iteration 4:
Centroids:
[[1.33333333 1.33333333]
 [5.66666667 1.33333333]]
Clusters:
Cluster 1: [array([1, 2]), array([1, 1]), array([2, 1])]
Cluster 2: [array([5, 1]), array([6, 1]), array([6, 2])]
--------------------
Iteration 5:
Centroids:
[[1.33333333 1.33333333]
 [5.66666667 1.33333333]]
Clusters:
Cluster 1: [array([1, 2]), array([1, 1]), array([2, 1])]
Cluste

In [13]:
import numpy as np
import plotly.graph_objs as go
from plotly.subplots import make_subplots

def k_means(x, u, max_iter):
    for iteration in range(max_iter):
        print(f"Iteration {iteration + 1}:")
        # Step 1: Assign each data point to the nearest centroid
        clusters = [[] for _ in range(len(u))]
        distances_to_centroids = [[] for _ in range(len(u))]  # For storing distances
        for point in x:
            distances = [np.linalg.norm(point - centroid) for centroid in u]
            nearest_centroid_index = np.argmin(distances)
            clusters[nearest_centroid_index].append(point)
            distances_to_centroids[nearest_centroid_index].append(distances[nearest_centroid_index])
        
        # Step 2: Update centroids to the mean of their assigned points
        new_centroids = []
        for cluster in clusters:
            if len(cluster) > 0:
                new_centroid = np.mean(cluster, axis=0)
                new_centroids.append(new_centroid)
            else:
                # If a centroid has no points assigned to it, keep it the same
                new_centroids.append(u[len(new_centroids)])
        
        # Converting list of centroids to numpy array
        u = np.array(new_centroids)
        
        # Print current centroids and clusters
        print("Centroids:")
        print(u)
        print("Clusters:")
        for i, cluster in enumerate(clusters):
            print(f"Cluster {i + 1}: {cluster}")
        print("--------------------")

        # Plotting
        plot_clusters(x, clusters, u, distances_to_centroids, iteration + 1)

def plot_clusters(x, clusters, centroids, distances_to_centroids, iteration):
    fig = make_subplots(rows=1, cols=1)
    colors = ['blue', 'red', 'green', 'purple', 'orange', 'yellow']  # Adjust as needed for more clusters
    for i, cluster in enumerate(clusters):
        cluster_x = [point[0] for point in cluster]
        cluster_y = [point[1] for point in cluster]
        fig.add_trace(go.Scatter(x=cluster_x, y=cluster_y, mode='markers', name=f'Cluster {i+1}', marker=dict(color=colors[i])), row=1, col=1)
        centroid_x = centroids[i][0]
        centroid_y = centroids[i][1]
        for point_x, point_y, distance in zip(cluster_x, cluster_y, distances_to_centroids[i]):
            fig.add_trace(go.Scatter(x=[centroid_x, point_x], y=[centroid_y, point_y], mode='lines', name='Distance', line=dict(color='gray', width=1, dash='dash')), row=1, col=1)
    centroid_x = [centroid[0] for centroid in centroids]
    centroid_y = [centroid[1] for centroid in centroids]
    fig.add_trace(go.Scatter(x=centroid_x, y=centroid_y, mode='markers', name='Centroids', marker=dict(color='black', size=10)), row=1, col=1)
    fig.update_layout(title=f"Iteration {iteration}", xaxis_title="X", yaxis_title="Y")
    fig.show()

# Given input
x = np.array([[1,2],[1,1],[2,1],[5,1],[6,1],[6,2]])  # inputs
u = np.array([[1,3],[2,2]])  # centroids
max_iter = 5

# Run K-means algorithm
k_means(x, u, max_iter)


Iteration 1:
Centroids:
[[1.  2. ]
 [4.  1.2]]
Clusters:
Cluster 1: [array([1, 2])]
Cluster 2: [array([1, 1]), array([2, 1]), array([5, 1]), array([6, 1]), array([6, 2])]
--------------------


Iteration 2:
Centroids:
[[1.33333333 1.33333333]
 [5.66666667 1.33333333]]
Clusters:
Cluster 1: [array([1, 2]), array([1, 1]), array([2, 1])]
Cluster 2: [array([5, 1]), array([6, 1]), array([6, 2])]
--------------------


Iteration 3:
Centroids:
[[1.33333333 1.33333333]
 [5.66666667 1.33333333]]
Clusters:
Cluster 1: [array([1, 2]), array([1, 1]), array([2, 1])]
Cluster 2: [array([5, 1]), array([6, 1]), array([6, 2])]
--------------------


Iteration 4:
Centroids:
[[1.33333333 1.33333333]
 [5.66666667 1.33333333]]
Clusters:
Cluster 1: [array([1, 2]), array([1, 1]), array([2, 1])]
Cluster 2: [array([5, 1]), array([6, 1]), array([6, 2])]
--------------------


Iteration 5:
Centroids:
[[1.33333333 1.33333333]
 [5.66666667 1.33333333]]
Clusters:
Cluster 1: [array([1, 2]), array([1, 1]), array([2, 1])]
Cluster 2: [array([5, 1]), array([6, 1]), array([6, 2])]
--------------------
