# K-means Clustering Demo

In [None]:
import pandas as pd
import altair as alt
import plotly.graph_objects as go
from sklearn.decomposition import PCA
from sklearn import datasets

from model_samples.kmeans import NaiveKMeans


In [6]:

# Iris dataset
iris = datasets.load_iris()


data = iris['data']

df = pd.DataFrame(
    iris['data'], 
    columns=['sepal_length', 'sepal_width', 'petal_length', 'petal_width']
)

# Plot 2D 
sepal_graph = alt.Chart(df).mark_circle().encode(
    x = 'sepal_length:Q',
    y = 'sepal_width:Q'
).properties(title="Sepal")

petal_graph = alt.Chart(df).mark_circle().encode(
    x = 'petal_length:Q',
    y = 'petal_width:Q'
).properties(title="Petal")

(sepal_graph | petal_graph).show()



In [7]:


# 3D chart
x = df['petal_length']
y = df['sepal_width']
z = df['petal_width']

def plotly_3D(x, y, z, categories = None):
    fig = go.Figure(data=go.Scatter3d(
        x=x,
        y=y,
        z=z,
        mode='markers',
        marker=dict(
            size=5,
            line=dict(
                color='rgba(217, 217, 217, 0.14)',
                width=1
            )
        ),
        marker_color=categories
    ))
    fig.update_layout(width=700, height=700)
    fig.show()

plotly_3D(x, y, z)



In [8]:

#%%
# PCA analysis

pca_decomposer = PCA(n_components=3)
pca_comps = pca_decomposer.fit_transform(data)

plotly_3D(
    pca_comps[:,0],
    pca_comps[:,1],
    pca_comps[:,2],
)


In [9]:

# Clustering
data = iris['data']

clusterer = NaiveKMeans(n_clusters=2, n_max_iterations=1000)
clusterer.fit(data)
from sklearn.decomposition import PCA


pca_decomposer = PCA(n_components=3)
pca_comps = pca_decomposer.fit_transform(data)


plotly_3D(
    pca_comps[:,0],
    pca_comps[:,1],
    pca_comps[:,2],
    categories=clusterer.attributions_
)
