In [61]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn import metrics

from sklearn.cluster import KMeans
from sklearn.cluster import AgglomerativeClustering
from sklearn.cluster import DBSCAN
from sklearn.cluster import MeanShift
from sklearn.cluster import Birch
from sklearn.cluster import AffinityPropagation
from sklearn.cluster import MiniBatchKMeans
from sklearn.cluster import SpectralClustering

from sklearn import preprocessing

import warnings
warnings.filterwarnings('ignore')

In [26]:
iris_df = pd.read_csv('datasets/iris.csv',skiprows=1, names= ['sepal-length', 'sepal-width',
                                                              'petal-length', 'petal-width', 'class'])
iris_df.head()

Unnamed: 0,sepal-length,sepal-width,petal-length,petal-width,class
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [27]:
label_encoding = preprocessing.LabelEncoder()

iris_df['class']= label_encoding.fit_transform(iris_df['class'])

iris_df.head()

Unnamed: 0,sepal-length,sepal-width,petal-length,petal-width,class
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [28]:
iris_features = iris_df.drop('class', axis = 1)
iris_features.head()

Unnamed: 0,sepal-length,sepal-width,petal-length,petal-width
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [29]:
iris_labels = iris_df['class']

In [30]:
def build_model(clustering_model, data, labels):
    
    model = clustering_model(data)
    
    print('homo\tcomp1\tv-meas\tARI\tAMI\tsilhouette')
    print(50 * '-')
    
    print('%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f'
         %(metrics.homogeneity_score(labels, model.labels_),
          metrics.completeness_score(labels, model.labels_),
          metrics.v_measure_score(labels, model.labels_),
          metrics.adjusted_rand_score(labels, model.labels_),
          metrics.adjusted_mutual_info_score(labels, model.labels_),
          metrics.silhouette_score(data, model.labels_)))

In [31]:
def k_means(data, n_clusters = 3, max_iter = 1000):
    model = KMeans(n_clusters =n_clusters, max_iter = max_iter).fit(data)
    
    return model

In [32]:
build_model(k_means,iris_features, iris_labels)

homo	comp1	v-meas	ARI	AMI	silhouette
--------------------------------------------------
0.751	0.765	0.758	0.730	0.755	0.553


In [33]:
def agglomerative_fn(data, n_clusters = 3):
    model = AgglomerativeClustering(n_clusters= n_clusters).fit(data)
    
    return model

In [34]:
build_model(agglomerative_fn,iris_features, iris_labels)

homo	comp1	v-meas	ARI	AMI	silhouette
--------------------------------------------------
0.761	0.780	0.770	0.731	0.767	0.554


In [38]:
def dbscan_fn(data, eps = 0.45, min_samples = 4):
    model = DBSCAN(eps = eps, min_samples = min_samples).fit(data)
    
    return model

In [39]:
build_model(dbscan_fn,iris_features, iris_labels)

homo	comp1	v-meas	ARI	AMI	silhouette
--------------------------------------------------
0.577	0.609	0.593	0.508	0.584	0.372


In [40]:
def mean_shift_fn(data, bandwidth = 0.85):
    model= MeanShift(bandwidth = bandwidth).fit(data)
    
    return model

In [41]:
build_model(mean_shift_fn,iris_features, iris_labels)

homo	comp1	v-meas	ARI	AMI	silhouette
--------------------------------------------------
0.760	0.772	0.766	0.744	0.763	0.551


In [47]:
def birch_fn(data, n_clusters = 3):
    model = Birch(n_clusters = n_clusters).fit(data)
    return model

In [48]:
build_model(birch_fn,iris_features, iris_labels)

homo	comp1	v-meas	ARI	AMI	silhouette
--------------------------------------------------
0.675	0.738	0.705	0.610	0.701	0.502


In [55]:
def affinity_propagation_fn(data, damping = 0.6,max_iter = 1000):
    model = AffinityPropagation(damping = damping, max_iter = max_iter).fit(data)
    return model

In [56]:
build_model(affinity_propagation_fn, iris_features, iris_labels)

homo	comp1	v-meas	ARI	AMI	silhouette
--------------------------------------------------
0.851	0.492	0.624	0.439	0.613	0.345


In [59]:
def mini_batch_kmeans_fn(data, n_clusters = 3, max_iter = 1000):
    model = MiniBatchKMeans(n_clusters = n_clusters, max_iter = max_iter, batch_size = 20).fit(data)
    return model

In [60]:
build_model(mini_batch_kmeans_fn, iris_features, iris_labels)

homo	comp1	v-meas	ARI	AMI	silhouette
--------------------------------------------------
0.740	0.746	0.743	0.729	0.740	0.544
