In [2]:
import numpy as np
import pandas as pd

data_points = {
    "A1": (2, 10),
    "A2": (2, 6),
    "A3": (11, 11),
    "A4": (6, 9),
    "A5": (6, 4),
    "A6": (1, 2),
    "A7": (5, 10),
    "A8": (4, 9),
    "A9": (10, 12),
    "A10": (7, 5),
    "A11": (9, 11),
    "A12": (4, 6),
    "A13": (3, 10),
    "A14": (3, 8),
    "A15": (6, 11),
}

initial_centroids = {
    "C1": (2, 6),
    "C2": (5, 10),
    "C3": (6, 11),
}


data_df = pd.DataFrame.from_dict(data_points, orient="index", columns=["x", "y"])

centroids_df = pd.DataFrame.from_dict(initial_centroids, orient="index", columns=["x", "y"])

print("Initial Data Points:")
print(data_df)

print("\nInitial Cluster Centroids:")
print(centroids_df)

def assign_clusters(data, centroids):
    distances = pd.DataFrame(
        {
            centroid: np.sqrt((data["x"] - centroids.loc[centroid, "x"]) ** 2 + 
                              (data["y"] - centroids.loc[centroid, "y"]) ** 2)
            for centroid in centroids.index
        }
    )
    
    data["Cluster"] = distances.idxmin(axis=1)
    return data

def update_centroids(data):

    return data.groupby("Cluster").mean().rename_axis(None)

data_with_clusters = assign_clusters(data_df.copy(), centroids_df)
updated_centroids = update_centroids(data_with_clusters)

print("\nInitial Data with Assigned Clusters:")
print(data_with_clusters)

print("\nUpdated Centroids After First Iteration:")
print(updated_centroids)


Initial Data Points:
      x   y
A1    2  10
A2    2   6
A3   11  11
A4    6   9
A5    6   4
A6    1   2
A7    5  10
A8    4   9
A9   10  12
A10   7   5
A11   9  11
A12   4   6
A13   3  10
A14   3   8
A15   6  11

Initial Cluster Centroids:
    x   y
C1  2   6
C2  5  10
C3  6  11

Initial Data with Assigned Clusters:
      x   y Cluster
A1    2  10      C2
A2    2   6      C1
A3   11  11      C3
A4    6   9      C2
A5    6   4      C1
A6    1   2      C1
A7    5  10      C2
A8    4   9      C2
A9   10  12      C3
A10   7   5      C1
A11   9  11      C3
A12   4   6      C1
A13   3  10      C2
A14   3   8      C1
A15   6  11      C3

Updated Centroids After First Iteration:
           x          y
C1  3.833333   5.166667
C2  4.000000   9.600000
C3  9.000000  11.250000
