In [16]:
import math
import pandas as pd

In [17]:
class Point:
    def __init__(self, name: str, x: int, y: int):
        self.name = name
        self.x = x
        self.y = y

    def __repr__(self):
        return f"{self.name}:({self.x}, {self.y})"

def get_dist(p1: Point, p2: Point, rounded=True) -> float:
    d = math.sqrt((p1.x - p2.x) ** 2 + (p1.y - p2.y) ** 2)
    if rounded:
        d = round(d, 3)
    return d

In [18]:
# Args
point_ls = [
    Point('A1', 2, 10),
    Point('A2', 2, 5),
    Point('A3', 8, 4),
    Point('A4', 5, 8),
    Point('A5', 7, 5),
    Point('A6', 6, 4),
    Point('A7', 1, 2),
    Point('A8', 4, 9),
]

centroid_ls = [
    Point('C1', 2, 10),  # A1
    Point('C2', 5, 8),  # A4
    Point('C3', 1, 2),  # A7
]

In [19]:
K = len(centroid_ls)

In [20]:
def get_nearest_centroid(p: Point, c_ls: list) -> (Point, list):
    nc = None
    nd = math.inf
    d_ls = list()
    for c in c_ls:
        d = get_dist(p, c)
        d_ls.append(d)
        if d < nd:
            nd = d
            nc = c

    return nc, d_ls


def get_iteration_record(pt_ls: list, c_ls: list) -> list:
    result = list()
    for p in pt_ls:
        nearest_c, d_ls = get_nearest_centroid(p, c_ls)
        d_ls.append(nearest_c.name)
        r = (p.name, d_ls)
        result.append(r)
    return result


def get_iteration_df(pt_ls: list, c_ls: list) -> pd.DataFrame:
    cols = [f"{c.name} dist" for c in c_ls]
    cols.append('Cluster Assigned')
    df = pd.DataFrame(columns=cols)
    iter_record = get_iteration_record(pt_ls, c_ls)
    for r in iter_record:
        r_idx_name = r[0]
        df.loc[r_idx_name] = r[1]

    return df


def print_iteration_table(iter_name, iter_df: pd.DataFrame):
    print(f"{iter_name} iteration : ")
    print(iter_df.T)

In [21]:
def k_means(pt_ls: list, c_ls: list):
    iter_df_1 = get_iteration_df(pt_ls, c_ls)
    print_iteration_table('1st', iter_df_1)

In [22]:
k_means(point_ls, centroid_ls)

1st iteration : 
                     A1     A2     A3     A4     A5     A6     A7     A8
C1 dist             0.0    5.0  8.485  3.606  7.071  7.211  8.062  2.236
C2 dist           3.606  4.243    5.0    0.0  3.606  4.123  7.211  1.414
C3 dist           8.062  3.162   7.28  7.211  6.708  5.385    0.0  7.616
Cluster Assigned     C1     C3     C2     C2     C2     C2     C3     C2


In [26]:
iter_df_1 = get_iteration_df(point_ls, centroid_ls)
iter_df_1.T

Unnamed: 0,A1,A2,A3,A4,A5,A6,A7,A8
C1 dist,0.0,5.0,8.485,3.606,7.071,7.211,8.062,2.236
C2 dist,3.606,4.243,5.0,0.0,3.606,4.123,7.211,1.414
C3 dist,8.062,3.162,7.28,7.211,6.708,5.385,0.0,7.616
Cluster Assigned,C1,C3,C2,C2,C2,C2,C3,C2
