### KMeans w sklearn

Używać będziemy klasy `KMeans` z modułu `sklearn.cluster`

In [None]:
# cluster - moduł sklearn zawierający algorytmy klastrowania
import sklearn.cluster

print(dir(sklearn.cluster))

#### Wygenerowanie losowego zbioru danych

In [None]:
import pandas as pd
from sklearn.datasets import make_blobs

data = make_blobs(n_samples=1000, centers=2, cluster_std=1.0, center_box=(-4.0, 4.0), random_state=42)[0]
df = pd.DataFrame(data, columns=['x1', 'x2'])
df.head()

In [None]:
import plotly.express as px

px.scatter(df, 'x1', 'x2', width=950, height=500, title='Dane')

#### Wyznaczenie klastrów

In [None]:
from sklearn.cluster import KMeans

kmeans = KMeans(n_clusters=2)

In [None]:
print(dir(kmeans))

In [None]:
kmeans.fit(data)

In [None]:
y_kmeans = kmeans.predict(data)
y_kmeans[:10]  # pierwsze 10 przypisań klastrów

In [None]:
df['y_kmeans'] = y_kmeans
df.head()

#### Wizualizacja klastrów

In [None]:
import plotly.graph_objects as go

centroid_1, centroid_2 = kmeans.cluster_centers_

fig = px.scatter(df, 'x1', 'x2', 'y_kmeans', width=950, height=500, title='Algorytm K-średnich - 2 klastry')
fig.add_trace(go.Scatter(x=[centroid_1[0]], y=[centroid_1[1]], name='centroid 1', mode='markers', marker_line_width=2))
fig.add_trace(go.Scatter(x=[centroid_2[0]], y=[centroid_2[1]], name='centroid 2', mode='markers', marker_line_width=2))

fig

In [None]:
kmeans.predict(
    [
        [-1, 4], 
        [1, -2], 
        [0, 0]
    ]
)