In [47]:
import pandas as pd
from sklearn.datasets import load_breast_cancer

In [48]:
from sklearn.preprocessing import MaxAbsScaler
from sklearn.cluster import KMeans
from sklearn.metrics.pairwise import euclidean_distances

In [49]:
data = load_breast_cancer()
X = data.data
y = data.target

In [50]:
X.shape

(569, 30)

In [51]:
scaler = MaxAbsScaler()
scaler.fit(X)
X_transform = scaler.transform(X)

In [52]:
kmeans_instance = KMeans(n_clusters=10, random_state=42)
kmeans_instance.fit(X_transform)

0,1,2
,n_clusters,10
,init,'k-means++'
,n_init,'auto'
,max_iter,300
,tol,0.0001
,verbose,0
,random_state,42
,copy_x,True
,algorithm,'lloyd'


In [53]:
kmeans_instance.cluster_centers_.shape

(10, 30)

In [54]:
kmeans_instance.cluster_centers_

array([[0.42221432, 0.48427246, 0.41768347, 0.18399307, 0.58675371,
        0.46657659, 0.53084973, 0.29070024, 0.64524854, 0.80736864,
        0.16728159, 0.3501649 , 0.16050955, 0.06588549, 0.36815862,
        0.61829148, 0.44163861, 0.59392562, 0.40505242, 0.50425603,
        0.35892219, 0.46586372, 0.34574487, 0.12595466, 0.53659778,
        0.31477631, 0.45760561, 0.45395189, 0.42157946, 0.51023829],
       [0.50396705, 0.45860115, 0.48545981, 0.25052218, 0.5600968 ,
        0.25045314, 0.13646467, 0.18247944, 0.55914413, 0.60957578,
        0.10097108, 0.19175251, 0.09297927, 0.04733933, 0.17102002,
        0.13609509, 0.06034418, 0.19171375, 0.21301125, 0.08839068,
        0.44123247, 0.48519776, 0.41397914, 0.18471552, 0.56010205,
        0.20474116, 0.18240012, 0.36325546, 0.42098178, 0.37215519],
       [0.44663438, 0.52681328, 0.42432104, 0.19710696, 0.49940643,
        0.1517533 , 0.04783996, 0.07334488, 0.52801358, 0.59613857,
        0.09961151, 0.28524978, 0.08781468, 0.

In [55]:
df = pd.DataFrame(data=X_transform, columns=data.feature_names)
df["label"] = y
df["cluster"] = kmeans_instance.labels_
df

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,label,cluster
0,0.639986,0.264257,0.651459,0.400240,0.724602,0.803706,0.703140,0.731113,0.795724,0.807779,...,0.734873,0.474612,0.728661,0.629112,0.568610,0.912027,0.693130,0.573012,0,7
1,0.731768,0.452393,0.705040,0.530188,0.518605,0.227678,0.203608,0.348757,0.596053,0.581589,...,0.632166,0.459803,0.556155,0.176371,0.192971,0.639175,0.414281,0.429012,0,6
2,0.700462,0.540988,0.689655,0.481008,0.670747,0.462942,0.462512,0.635686,0.680592,0.615661,...,0.607086,0.401740,0.648697,0.401229,0.359744,0.835052,0.544290,0.422072,0,6
3,0.406261,0.518839,0.411565,0.154378,0.872093,0.821946,0.565604,0.522863,0.854276,1.000000,...,0.393591,0.133451,0.942498,0.818809,0.548642,0.884880,1.000000,0.833735,0,3
4,0.721807,0.365071,0.716711,0.518593,0.613831,0.384482,0.463918,0.518390,0.595066,0.603756,...,0.605892,0.370240,0.617251,0.193762,0.319489,0.558419,0.356131,0.370024,0,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
564,0.766987,0.570010,0.753316,0.591363,0.679315,0.335553,0.571462,0.690358,0.567763,0.577073,...,0.661226,0.476493,0.633423,0.199716,0.328035,0.761512,0.310334,0.342892,0,6
565,0.716115,0.719196,0.696021,0.504198,0.598531,0.299363,0.337395,0.486630,0.576316,0.567837,...,0.617038,0.406911,0.523810,0.181664,0.256789,0.559450,0.387466,0.319855,0,6
566,0.590537,0.714868,0.574536,0.343103,0.517442,0.296178,0.216753,0.263519,0.523026,0.579639,...,0.504379,0.264222,0.511680,0.292439,0.271805,0.487285,0.334137,0.376867,0,8
567,0.732835,0.746690,0.743236,0.505798,0.720930,0.801969,0.823336,0.755467,0.788487,0.720033,...,0.734873,0.428068,0.741240,0.820510,0.749760,0.910653,0.615697,0.597590,0,7


In [56]:
distance_feature = []

for i in range(len(X)):

    row_distances = {}

    for cluster in df["cluster"].unique().tolist():

        centroide_values = kmeans_instance.cluster_centers_[cluster]

        distance_value = euclidean_distances(centroide_values.reshape(1, -1), X[i].reshape(1, -1))
        
        row_distances.update({f"cluster_{cluster}": distance_value[0][0]})
    distance_feature.append(row_distances)
    
df_distances = pd.DataFrame(distance_feature)
df_distances


Unnamed: 0,cluster_7,cluster_6,cluster_3,cluster_8,cluster_1,cluster_4,cluster_2,cluster_9,cluster_0,cluster_5
0,2269.161457,2269.224987,2269.484613,2269.500312,2269.559880,2269.692676,2269.632983,2269.690140,2269.650983,2268.796839
1,2372.941984,2373.006221,2373.272557,2373.286877,2373.345483,2373.483419,2373.420482,2373.480769,2373.440697,2372.576536
2,2101.344800,2101.411301,2101.681245,2101.696069,2101.755828,2101.896095,2101.831632,2101.893012,2101.851822,2100.974709
3,698.658107,698.732479,699.016530,699.035402,699.103346,699.258119,699.183297,699.251994,699.207546,698.275805
4,2052.152025,2052.220671,2052.495247,2052.509183,2052.568500,2052.711481,2052.646205,2052.708510,2052.666146,2051.779099
...,...,...,...,...,...,...,...,...,...,...
564,2523.397337,2523.464539,2523.734944,2523.749540,2523.808906,2523.948290,2523.884398,2523.945339,2523.903748,2523.024989
565,2153.455756,2153.522736,2153.793437,2153.808562,2153.868752,2154.009752,2153.944059,2154.006283,2153.964855,2153.085156
566,1424.815099,1424.884035,1425.159543,1425.175327,1425.237327,1425.383238,1425.314161,1425.379011,1425.336667,1424.441939
567,2231.005433,2231.072541,2231.342796,2231.358421,2231.419263,2231.560762,2231.495044,2231.557149,2231.515742,2230.635539


In [57]:
df = pd.concat([df, df_distances], axis=1)
df

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,cluster_7,cluster_6,cluster_3,cluster_8,cluster_1,cluster_4,cluster_2,cluster_9,cluster_0,cluster_5
0,0.639986,0.264257,0.651459,0.400240,0.724602,0.803706,0.703140,0.731113,0.795724,0.807779,...,2269.161457,2269.224987,2269.484613,2269.500312,2269.559880,2269.692676,2269.632983,2269.690140,2269.650983,2268.796839
1,0.731768,0.452393,0.705040,0.530188,0.518605,0.227678,0.203608,0.348757,0.596053,0.581589,...,2372.941984,2373.006221,2373.272557,2373.286877,2373.345483,2373.483419,2373.420482,2373.480769,2373.440697,2372.576536
2,0.700462,0.540988,0.689655,0.481008,0.670747,0.462942,0.462512,0.635686,0.680592,0.615661,...,2101.344800,2101.411301,2101.681245,2101.696069,2101.755828,2101.896095,2101.831632,2101.893012,2101.851822,2100.974709
3,0.406261,0.518839,0.411565,0.154378,0.872093,0.821946,0.565604,0.522863,0.854276,1.000000,...,698.658107,698.732479,699.016530,699.035402,699.103346,699.258119,699.183297,699.251994,699.207546,698.275805
4,0.721807,0.365071,0.716711,0.518593,0.613831,0.384482,0.463918,0.518390,0.595066,0.603756,...,2052.152025,2052.220671,2052.495247,2052.509183,2052.568500,2052.711481,2052.646205,2052.708510,2052.666146,2051.779099
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
564,0.766987,0.570010,0.753316,0.591363,0.679315,0.335553,0.571462,0.690358,0.567763,0.577073,...,2523.397337,2523.464539,2523.734944,2523.749540,2523.808906,2523.948290,2523.884398,2523.945339,2523.903748,2523.024989
565,0.716115,0.719196,0.696021,0.504198,0.598531,0.299363,0.337395,0.486630,0.576316,0.567837,...,2153.455756,2153.522736,2153.793437,2153.808562,2153.868752,2154.009752,2153.944059,2154.006283,2153.964855,2153.085156
566,0.590537,0.714868,0.574536,0.343103,0.517442,0.296178,0.216753,0.263519,0.523026,0.579639,...,1424.815099,1424.884035,1425.159543,1425.175327,1425.237327,1425.383238,1425.314161,1425.379011,1425.336667,1424.441939
567,0.732835,0.746690,0.743236,0.505798,0.720930,0.801969,0.823336,0.755467,0.788487,0.720033,...,2231.005433,2231.072541,2231.342796,2231.358421,2231.419263,2231.560762,2231.495044,2231.557149,2231.515742,2230.635539
