In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
import seaborn as sns
from sklearn.cluster import KMeans 
from sklearn.metrics import silhouette_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import Normalizer

In [None]:
from ucimlrepo import fetch_ucirepo 
from sklearn.datasets import load_wine, load_iris

# Import the Iris dataset
iris = load_iris() 
X_iris = iris.data
y_iris = iris.target

# Import the Wine dataset
wine = load_wine()
X_wine = wine.data
y_wine = wine.target


In [None]:
#MinMaxScaler
scaler = MinMaxScaler()
X_iris_scaled = scaler.fit_transform(X_iris)
X_wine_scaled = scaler.fit_transform(X_wine)


In [None]:
#KMeans clustering for Iris dataset (scaled)
kmeans_iris = KMeans(n_clusters=3, random_state=0)
kmeans_iris.fit(X_iris_scaled)
y_pred_iris = kmeans_iris.predict(X_iris_scaled)
print("Silhouette score for Iris dataset is: ", silhouette_score(X_iris_scaled, y_pred_iris))

In [None]:
#KMeans clustering for Wine dataset (scaled)
kmeans_wine = KMeans(n_clusters=3, random_state=0)
kmeans_wine.fit(X_wine_scaled)
y_pred_wine = kmeans_wine.predict(X_wine_scaled)
print("Silhouette score for Wine dataset is: ", silhouette_score(X_wine_scaled, y_pred_wine))

In [None]:
#Kmeans clustering for Iris dataset (not scaled)
kmeans_iris_unscaled = KMeans(n_clusters=3, random_state=0)
kmeans_iris_unscaled.fit(X_iris)
y_pred_iris_unscaled = kmeans_iris_unscaled.predict(X_iris)
print("Silhouette score for Iris dataset is: ", silhouette_score(X_iris, y_pred_iris_unscaled))

In [None]:
#Kmeans clustering for Wine dataset (not scaled)
kmeans_wine_unscaled = KMeans(n_clusters=3, random_state=0)
kmeans_wine_unscaled.fit(X_wine)
y_pred_wine_unscaled = kmeans_wine_unscaled.predict(X_wine)
print("Silhouette score for Wine dataset is: ", silhouette_score(X_wine, y_pred_wine_unscaled))

In [None]:
#Visualize the cluster in Iris dataset (scaled)
plt.scatter(X_iris_scaled[y_pred_iris == 0, 0], X_iris_scaled[y_pred_iris == 0, 1], s = 100, c = 'purple', label = 'Iris-setosa')
plt.scatter(X_iris_scaled[y_pred_iris == 1, 0], X_iris_scaled[y_pred_iris == 1, 1], s = 100, c = 'orange', label = 'Iris-versicolour')
plt.scatter(X_iris_scaled[y_pred_iris == 2, 0], X_iris_scaled[y_pred_iris == 2, 1], s = 100, c = 'green', label = 'Iris-virginica')

#Plotting the centroids of the clusters (scaled)
plt.scatter(kmeans_iris.cluster_centers_[:, 0], kmeans_iris.cluster_centers_[:,1], s = 100, c = 'red', label = 'Centroids')

plt.legend()
plt.title(f'Clusters of Iris dataset (k = {3})')
plt.savefig('KMeans_Iris_scaled.png')
plt.show()

In [None]:
#Visualize the cluster in Wine dataset (scaled)
plt.scatter(X_wine_scaled[y_pred_wine == 0, 0], X_wine_scaled[y_pred_wine == 0, 1], s = 100, c = 'purple', label = 'Wine-1')
plt.scatter(X_wine_scaled[y_pred_wine == 1, 0], X_wine_scaled[y_pred_wine == 1, 1], s = 100, c = 'orange', label = 'Wine-2')
plt.scatter(X_wine_scaled[y_pred_wine == 2, 0], X_wine_scaled[y_pred_wine == 2, 1], s = 100, c = 'green', label = 'Wine-3')

#Plotting the centroids of the clusters (scaled)
plt.scatter(kmeans_wine.cluster_centers_[:, 0], kmeans_wine.cluster_centers_[:,1], s = 100, c = 'red', label = 'Centroids')

plt.legend()
plt.title(f'Clusters of Wine dataset (k = {3})')
plt.savefig('KMeans_Wine_scaled.png')
plt.show()

In [None]:
#Visualize the cluster in Iris dataset (not scaled)
plt.scatter(X_iris[y_pred_iris_unscaled == 0, 0], X_iris[y_pred_iris_unscaled == 0, 1], s = 100, c = 'purple', label = 'Iris-setosa')
plt.scatter(X_iris[y_pred_iris_unscaled == 1, 0], X_iris[y_pred_iris_unscaled == 1, 1], s = 100, c = 'orange', label = 'Iris-versicolour')
plt.scatter(X_iris[y_pred_iris_unscaled == 2, 0], X_iris[y_pred_iris_unscaled == 2, 1], s = 100, c = 'green', label = 'Iris-virginica')

#Plotting the centroids of the clusters (not scaled)
plt.scatter(kmeans_iris_unscaled.cluster_centers_[:, 0], kmeans_iris_unscaled.cluster_centers_[:,1], s = 100, c = 'red', label = 'Centroids')

plt.legend()
plt.title('Clusters of Iris dataset (not scaled)')
plt.savefig('KMeans_Iris_unscaled.png')
plt.show()

In [None]:
#Visualize the cluster in Wine dataset (not scaled)
plt.scatter(X_wine[y_pred_wine_unscaled == 0, 0], X_wine[y_pred_wine_unscaled == 0, 1], s = 100, c = 'purple', label = 'Wine-1')
plt.scatter(X_wine[y_pred_wine_unscaled == 1, 0], X_wine[y_pred_wine_unscaled == 1, 1], s = 100, c = 'orange', label = 'Wine-2')
plt.scatter(X_wine[y_pred_wine_unscaled == 2, 0], X_wine[y_pred_wine_unscaled == 2, 1], s = 100, c = 'green', label = 'Wine-3')

#Plotting the centroids of the clusters (not scaled)
plt.scatter(kmeans_wine_unscaled.cluster_centers_[:, 0], kmeans_wine_unscaled.cluster_centers_[:,1], s = 100, c = 'red', label = 'Centroids')

plt.legend()
plt.title('Clusters of Wine dataset (not scaled)')
plt.savefig('KMeans_Wine_unscaled.png')
plt.show()

In [None]:
#Implement the elbow method for the scaled Iris dataset
wcss = []
for i in range(1, 11):
    kmeans_iris = KMeans(n_clusters=i, random_state=0)
    kmeans_iris.fit(X_iris_scaled)
    wcss.append(kmeans_iris.inertia_)
plt.plot(range(1, 11), wcss)
plt.title('The Elbow Method for Iris dataset (scaled)')
plt.xlabel('Number of clusters')
plt.ylabel('WCSS')
plt.savefig('Elbow_Iris_scaled.png')
plt.show()