In [2]:
# import libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn import metrics
from sklearn import datasets
from sklearn.cluster import KMeans
from sklearn.cluster import AgglomerativeClustering
from sklearn.cluster import Birch
from sklearn.cluster import DBSCAN
from sklearn.cluster import MeanShift
from sklearn.cluster import AffinityPropagation
from sklearn.cluster import MiniBatchKMeans

In [9]:
df = datasets.load_iris()

In [10]:
iris_features = pd.DataFrame(df.data, columns=df.feature_names)
iris_target = pd.DataFrame(df.target, columns=["class"])
iris = pd.concat([iris_features,iris_target],axis=1)

In [11]:
iris.sample(10)

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),class
6,4.6,3.4,1.4,0.3,0
57,4.9,2.4,3.3,1.0,1
113,5.7,2.5,5.0,2.0,2
88,5.6,3.0,4.1,1.3,1
54,6.5,2.8,4.6,1.5,1
47,4.6,3.2,1.4,0.2,0
83,6.0,2.7,5.1,1.6,1
35,5.0,3.2,1.2,0.2,0
24,4.8,3.4,1.9,0.2,0
105,7.6,3.0,6.6,2.1,2


In [12]:
iris_df = iris.sample(frac=1).reset_index(drop=True)
iris_df.sample(10)

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),class
14,5.7,3.8,1.7,0.3,0
4,7.3,2.9,6.3,1.8,2
135,5.6,3.0,4.5,1.5,1
137,5.4,3.4,1.7,0.2,0
69,6.3,2.5,5.0,1.9,2
80,4.7,3.2,1.3,0.2,0
124,6.1,3.0,4.6,1.4,1
42,5.1,3.5,1.4,0.2,0
129,7.6,3.0,6.6,2.1,2
54,5.6,2.7,4.2,1.3,1


In [13]:
iris_features = iris_df.drop('class', axis = 1)
iris_target = iris_df['class']

iris_features.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,4.6,3.2,1.4,0.2
1,5.9,3.0,4.2,1.5
2,6.4,2.7,5.3,1.9
3,5.7,3.0,4.2,1.2
4,7.3,2.9,6.3,1.8


In [14]:
iris_target.head()

0    0
1    1
2    2
3    1
4    2
Name: class, dtype: int64

In [18]:
def build_model(clustering_model, data, labels):

    model = clustering_model(data)

    print('home\tcomp1\tv-meas\tARI\tAMI\tsilhouette')
    print(50 * '-')

    print('%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f'
           %(metrics.homogeneity_score(labels, model.labels_),
             metrics.completeness_score(labels, model.labels_),
             metrics.v_measure_score(labels, model.labels_),
             metrics.adjusted_rand_score(labels, model.labels_),
             metrics.adjusted_mutual_info_score(labels, model.labels_),
             metrics.silhouette_score(data, model.labels_)))

In [19]:
def k_means(data, n_clusters=3, max_iter=1000):
    model = KMeans(n_clusters=n_clusters, max_iter=max_iter).fit(data)

    return model

In [20]:
build_model(k_means, iris_features, iris_target)

home	comp1	v-meas	ARI	AMI	silhouette
--------------------------------------------------
0.751	0.765	0.758	0.730	0.755	0.553


  super()._check_params_vs_input(X, default_n_init=10)


In [21]:
def agglomerative_fn(data, n_clusters=3):
    model = AgglomerativeClustering(n_clusters=n_clusters).fit(data)
    return model

In [22]:
build_model(agglomerative_fn, iris_features, iris_target)

home	comp1	v-meas	ARI	AMI	silhouette
--------------------------------------------------
0.761	0.780	0.770	0.731	0.767	0.554


In [23]:
def dbscan_fn(data, eps=0.45, min_samples=4):
    model = DBSCAN(eps=eps, min_samples=min_samples).fit(data)
    return model

In [24]:
build_model(dbscan_fn, iris_features, iris_target)

home	comp1	v-meas	ARI	AMI	silhouette
--------------------------------------------------
0.577	0.609	0.593	0.508	0.584	0.372


In [25]:
def mean_shift_fn(data, bandwidth=0.85):
    model = MeanShift(bandwidth=bandwidth).fit(data)
    return model

In [26]:
build_model(mean_shift_fn, iris_features, iris_target)

home	comp1	v-meas	ARI	AMI	silhouette
--------------------------------------------------
0.760	0.772	0.766	0.744	0.763	0.551


In [27]:
def birch_fn(data, n_clusters=3):
    model = Birch(n_clusters=n_clusters).fit(data)
    return model

In [28]:
build_model(birch_fn, iris_features, iris_target)

home	comp1	v-meas	ARI	AMI	silhouette
--------------------------------------------------
0.770	0.798	0.784	0.720	0.781	0.551


In [29]:
def affinity_propagation_fn(data, damping=0.6, max_iter=1000):
    model = AffinityPropagation(damping=damping, max_iter=max_iter).fit(data)
    return model

In [30]:
build_model(affinity_propagation_fn, iris_features, iris_target)

home	comp1	v-meas	ARI	AMI	silhouette
--------------------------------------------------
0.851	0.492	0.624	0.439	0.613	0.345


In [44]:
def mini_batch_kmeans_fn(data, n_clusters=3, max_iter=1000, batch_size=60):
    model = MiniBatchKMeans(n_clusters=n_clusters, max_iter=max_iter, batch_size=batch_size).fit(data)
    return model

In [45]:
build_model(mini_batch_kmeans_fn, iris_features, iris_target)

home	comp1	v-meas	ARI	AMI	silhouette
--------------------------------------------------
0.778	0.778	0.778	0.786	0.775	0.524


  super()._check_params_vs_input(X, default_n_init=3)
