In [38]:
import plotly.express as px
import plotly.graph_objects as go
import numpy as np

from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.datasets import make_blobs

In [39]:
X, y = make_blobs(n_samples=200, centers=5, random_state=1)

In [40]:
graphic = px.scatter(x=X[:, 0], y=X[:, 1], title="KMeans Clustering")
graphic.show()

In [41]:
kmeans = KMeans(n_clusters=5)
kmeans.fit(X)


KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1.



In [42]:
labels = kmeans.predict(X)
labels

array([4, 2, 3, 2, 2, 3, 0, 3, 2, 4, 3, 3, 3, 1, 3, 4, 4, 1, 2, 2, 4, 3,
       0, 3, 0, 1, 2, 0, 0, 1, 2, 0, 4, 3, 2, 4, 1, 2, 1, 4, 1, 1, 0, 0,
       2, 0, 1, 3, 2, 0, 4, 3, 3, 4, 3, 1, 4, 3, 2, 2, 1, 3, 3, 3, 2, 0,
       4, 1, 0, 2, 1, 0, 2, 0, 0, 1, 2, 1, 4, 0, 1, 1, 3, 2, 3, 4, 2, 2,
       2, 1, 0, 3, 4, 0, 3, 1, 0, 4, 2, 1, 4, 2, 0, 1, 0, 2, 1, 2, 3, 4,
       3, 3, 2, 1, 3, 4, 1, 0, 4, 4, 2, 1, 1, 3, 4, 1, 2, 0, 2, 0, 4, 0,
       3, 3, 1, 1, 3, 4, 4, 3, 3, 3, 4, 4, 0, 2, 0, 4, 2, 3, 0, 3, 4, 0,
       3, 1, 1, 1, 4, 2, 0, 4, 1, 4, 0, 1, 1, 1, 3, 0, 1, 0, 0, 3, 4, 2,
       0, 3, 2, 4, 2, 4, 4, 2, 1, 0, 1, 4, 4, 3, 2, 0, 4, 1, 1, 0, 3, 0,
       2, 1])

In [43]:
centers = kmeans.cluster_centers_
centers

array([[-2.17069756,  1.02591979],
       [-9.85620522, -3.91021738],
       [-6.87958999, -8.11648104],
       [-5.90368078, -3.04489641],
       [-1.58338528,  4.50520457]])

In [44]:
graphic1 = px.scatter(x=X[:, 0], y=X[:, 1], color=labels, title="KMeans Clustering")
graphic2 = px.scatter(x=centers[:, 0], y=centers[:, 1], color=range(5), title="KMeans Clustering")
graphic3 = go.Figure(data=[graphic1.data[0], graphic2.data[0]])
graphic3.show()