In [1]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import davies_bouldin_score
from sklearn.metrics import silhouette_score
from sklearn.metrics import adjusted_rand_score
from sklearn.metrics import calinski_harabasz_score
import pandas as pd
import numpy as np

In [7]:
#Importing wine dataset
from sklearn.datasets import load_wine

#Loading dataset
wine = load_wine()

#Creating dataframe
dfwine = pd.DataFrame(data = wine.data, columns = wine.feature_names)

#Getting true labels
true_labels = wine.target

#Displaying dataframe
dfwine

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline
0,14.23,1.71,2.43,15.6,127.0,2.80,3.06,0.28,2.29,5.64,1.04,3.92,1065.0
1,13.20,1.78,2.14,11.2,100.0,2.65,2.76,0.26,1.28,4.38,1.05,3.40,1050.0
2,13.16,2.36,2.67,18.6,101.0,2.80,3.24,0.30,2.81,5.68,1.03,3.17,1185.0
3,14.37,1.95,2.50,16.8,113.0,3.85,3.49,0.24,2.18,7.80,0.86,3.45,1480.0
4,13.24,2.59,2.87,21.0,118.0,2.80,2.69,0.39,1.82,4.32,1.04,2.93,735.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
173,13.71,5.65,2.45,20.5,95.0,1.68,0.61,0.52,1.06,7.70,0.64,1.74,740.0
174,13.40,3.91,2.48,23.0,102.0,1.80,0.75,0.43,1.41,7.30,0.70,1.56,750.0
175,13.27,4.28,2.26,20.0,120.0,1.59,0.69,0.43,1.35,10.20,0.59,1.56,835.0
176,13.17,2.59,2.37,20.0,120.0,1.65,0.68,0.53,1.46,9.30,0.60,1.62,840.0


In [10]:
#KMeans Clustering

from sklearn.cluster import KMeans

kmeans = KMeans(n_clusters = 3, random_state = 20, n_init = "auto").fit(dfwine)

#Getting lables
kmeans_labels = kmeans.labels_

#Calculating silhouette score
silhouette_avg1 = silhouette_score(dfwine, kmeans_labels)

#Calculating DBI
dbi_score1 = davies_bouldin_score(dfwine, kmeans_labels)

#Calculating Rand Score
rand_score1 = adjusted_rand_score(true_labels, kmeans_labels)

#Calculating Calinski and Harabasz SCore
ch_score1 = calinski_harabasz_score(dfwine, kmeans_labels)


print("KMeans Clustering")
print("Silhouette Score: ", silhouette_avg1)
print("Davies Bouldin Index: ", dbi_score1)
print("Rand Score: ", rand_score1)
print("Calinski and Harabasz Score: ", ch_score1)

KMeans Clustering
Silhouette Score:  0.5711381937868838
Davies Bouldin Index:  0.5342431775436286
Rand Score:  0.37111371823084754
Calinski and Harabasz Score:  561.815657860671


In [9]:
#MeanShift Clustering

from sklearn.cluster import MeanShift

meanshift = MeanShift().fit(dfwine)

#Getting lables
meanshift_labels = meanshift.labels_

#Calculating silhouette score
silhouette_avg2 = silhouette_score(dfwine, meanshift_labels)

#Calculating DBI
dbi_score2 = davies_bouldin_score(dfwine, meanshift_labels)

#Calculating Rand Score
rand_score2 = adjusted_rand_score(true_labels, meanshift_labels)

#Calculating Calinski and Harabasz SCore
ch_score2 = calinski_harabasz_score(dfwine, meanshift_labels)


print("MeanShift Clustering")
print("Silhouette Score: ", silhouette_avg2)
print("Davies Bouldin Index: ", dbi_score2)
print("Rand Score: ", rand_score2)
print("Calinski and Harabasz Score: ", ch_score2)

MeanShift Clustering
Silhouette Score:  0.5024921259394886
Davies Bouldin Index:  0.5561495856687338
Rand Score:  0.39723664098601413
Calinski and Harabasz Score:  454.0589434652176


In [11]:
#Agglomerative Clustering

from sklearn.cluster import AgglomerativeClustering

AClustering = AgglomerativeClustering(n_clusters = 3).fit(dfwine)

#Getting lables
agg_labels = AClustering.labels_

#Calculating silhouette score
silhouette_avg3 = silhouette_score(dfwine, agg_labels)

#Calculating DBI
dbi_score3 = davies_bouldin_score(dfwine, agg_labels)

#Calculating Rand Score
rand_score3 = adjusted_rand_score(true_labels, agg_labels)

#Calculating Calinski and Harabasz SCore
ch_score3 = calinski_harabasz_score(dfwine, agg_labels)


print("Agglomerative Clustering")
print("Silhouette Score: ", silhouette_avg3)
print("Davies Bouldin Index: ", dbi_score3)
print("Rand Score: ", rand_score3)
print("Calinski and Harabasz Score: ", ch_score3)

Agglomerative Clustering
Silhouette Score:  0.5644796401732068
Davies Bouldin Index:  0.5357343073560251
Rand Score:  0.36840191587483156
Calinski and Harabasz Score:  552.851711505718


In [12]:
#Spectral Clustering

from sklearn.cluster import SpectralClustering

SClustering = SpectralClustering(n_clusters = 3, random_state = 20).fit(dfwine)

#Getting lables
spec_labels = SClustering.labels_

#Calculating silhouette score
silhouette_avg4 = silhouette_score(dfwine, spec_labels)

#Calculating DBI
dbi_score4 = davies_bouldin_score(dfwine, spec_labels)

#Calculating Rand Score
rand_score4 = adjusted_rand_score(true_labels, spec_labels)

#Calculating Calinski and Harabasz SCore
ch_score4 = calinski_harabasz_score(dfwine, spec_labels)


print("Spectral Clustering")
print("Silhouette Score: ", silhouette_avg4)
print("Davies Bouldin Index: ", dbi_score4)
print("Rand Score: ", rand_score4)
print("Calinski and Harabasz Score: ", ch_score4)



Spectral Clustering
Silhouette Score:  0.28034808703238107
Davies Bouldin Index:  0.44772255236450287
Rand Score:  0.000351000351000351
Calinski and Harabasz Score:  3.3691025466629982


  est = KMeans(
