# K-means interactive

> Yang

In [22]:
%matplotlib qt
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.widgets import Button
from matplotlib.widgets import PolygonSelector
from sklearn.cluster import KMeans

def colors_from_lbs(lbs, colors=None):
    mpl_20 = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd',
          '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf',
          '#3397dc', '#ff993e', '#3fca3f', '#df5152', '#a985ca',
          '#ad7165', '#e992ce', '#999999', '#dbdc3c', '#35d8e9']
    
    if colors is None:
        colors = np.array(mpl_20)
    else:
        colors = np.array(colors)
    lbs = np.array(lbs) % len(colors)
    return colors[lbs]

rng = np.random.RandomState(0)
n_samples = 1000
cov = [[1, 0], [0, 1]]
X = np.concatenate([
    rng.multivariate_normal(mean=[-2, 0], cov=cov, size=n_samples), 
    rng.multivariate_normal(mean=[2, 0], cov=cov, size=n_samples)])

kmeans = KMeans(n_clusters=2, random_state=0, n_init="auto")
labels = kmeans.fit_predict(X)

#kmeans.cluster_centers_ = np.array([[1,2],[2,1]],dtype=kmeans.cluster_centers_.dtype)
#centers = kmeans.predict(X)
centers = kmeans.cluster_centers_

fig, (ax_orig, ax_redim) = plt.subplots(1, 2, figsize=(12, 6))

def plot_figure(axe_list, X, centers):
    ax_orig, ax_redim = axe_list

    #labels = [np.argmax([np.linalg.norm(x-center) for center in centers]) for x in X]
    kmeans.cluster_centers_ = np.array(centers, dtype=np.float64)
    labels = kmeans.predict(X)    

    ax_orig.clear()
    ax_orig.scatter(X[:, 0], X[:, 1], alpha=0.3, label="samples", c=colors_from_lbs(labels))
    ax_orig.scatter(centers[:,0], centers[:,1], s=50, c='black', edgecolors='r')
    ax_orig.set(
        aspect="auto", 
        title="K-means",
        xlabel="first feature",
        ylabel="second feature",
    )

    ax_redim.clear()
    class_name = ['class {0}'.format(i) for i in range(len(centers))]

    # update labels
    counts = [np.sum(labels==i) for i in range(len(centers))]
    

    ax_redim.bar(class_name, counts, label=class_name)
    #ax_redim.bar((X @ _component.T - _x_center @ _component.T),50)
    ax_redim.set(
        aspect="auto",
        title="Clustering results",
        xlabel="Main feature",
        ylabel="Number of samples",
    )
    fig.canvas.draw_idle()

plot_figure((ax_orig, ax_redim), X, centers)

def onselect(verts):
    centers = np.array(verts)
    plot_figure((ax_orig, ax_redim), X, centers)

selector = PolygonSelector(ax_orig, onselect=onselect, 
                           props=dict(color='r', linestyle='', linewidth=3, alpha=0.6, label=f"Component"))
selector.verts = centers


# ax_redim.hist((X @ component.T - x_center @ component.T),50)
# ax_redim.set(
#     aspect="auto",
#     title="1-dimensional dataset after dimension reduction",
#     xlabel="Main feature",
#     ylabel="Number of samples",
# )
#_asp = np.diff(ax_orig.get_ylim())[0] / np.diff(ax_orig.get_xlim())[0]
#ax_redim.set_aspect(_asp)

plt.tight_layout()
plt.show()


In [None]:


# pca = PCA(n_components=1).fit(X)
# component = pca.components_.reshape(-1)

# # print(pca.components_)
# # print(pca.explained_variance_)
# # print(list(zip(pca.components_, pca.explained_variance_)))

# # fig, (ax_orig, ax_redim) = plt.subplots(1, 2, figsize=(12, 6))
# # ax_orig.scatter(X[:, 0], X[:, 1], alpha=0.3, label="samples")
# # x_center = np.mean(X, axis=0)

# comp_vector = [component, x_center]

# ax_orig.set(
#     aspect="auto", 
#     title="2-dimensional dataset with principal components",
#     xlabel="first feature",
#     ylabel="second feature",
# )


