# Clustering avec K-Means

In [1]:
from scipy.spatial.distance import euclidean
import numpy as np

from sklearn import datasets
from sklearn.cluster import KMeans

from ipywidgets import IntSlider, HBox, VBox, Button

from bqplot import (
    LogScale, LinearScale, OrdinalColorScale, ColorAxis,
    Axis, Scatter, Lines, CATEGORY10, Label, Figure, Tooltip
)

In [2]:
current_k = 5

In [3]:
initial_k=5

In [4]:
n_samples = 2000

In [5]:
blobs = datasets.make_blobs(n_samples=n_samples,centers=initial_k, random_state=20)

In [6]:
data_tab = blobs[0]

In [7]:
x_sc = LinearScale(min=blobs[0][:,0].min()-2, max=blobs[0][:,0].max()+2)
y_sc = LinearScale(min=blobs[0][:,1].min()-2, max=blobs[0][:,1].max()+2)
c_sc = OrdinalColorScale(domain=np.arange(10).tolist(), colors=CATEGORY10[:6])

In [8]:
ax_y = Axis(label='y', scale=y_sc, orientation='vertical', side='left', grid_lines='solid')
ax_x = Axis(label='x', scale=x_sc, grid_lines='solid')

In [9]:
scat = Scatter(x=blobs[0][:,0],
               y=blobs[0][:,1],
               scales={'x': x_sc, 'y': y_sc, 'color': c_sc}, color=[1])

In [10]:
scat_centroids = Scatter(x=[0], y=[0], scales={"x": x_sc, "y": y_sc}, colors=["black"])

In [11]:
fig = Figure(marks=[scat, scat_centroids], title='K-means', animation_duration=10, axes=[ax_x, ax_y])

In [12]:
k_slider = IntSlider(min=2,
                     max=10,
                     step=1,
                     description='Number of clusters',
                     value=initial_k)

In [13]:
def change_k(change):
    kmeans = KMeans(n_clusters=k_slider.value).fit(blobs[0])
    scat.color=kmeans.labels_
    scat_centroids.x = kmeans.cluster_centers_[:, 0]
    scat_centroids.y = kmeans.cluster_centers_[:, 1]

In [14]:
k_slider.observe(change_k, 'value')

In [15]:
VBox([HBox([k_slider]), fig])

VBox(children=(HBox(children=(IntSlider(value=5, description='Number of clusters', max=10, min=2),)), Figure(a…