<center><h1> Clustring model on IRIS dataset </h1></center>

<a id="1"></a>
## 1.Python Libraries

In [None]:
!pip install kneed

In [None]:
#import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
import seaborn as sns
import plotly.graph_objects as go
from sklearn.cluster import KMeans 
from sklearn.metrics import silhouette_score
from scipy.cluster.hierarchy import linkage,dendrogram
from sklearn.preprocessing import StandardScaler
import plotly.express as px
#warnings
import warnings
warnings.simplefilter(action='ignore')

<a id="2"></a>
## 2.data loading

In [None]:
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
#Load data
from sklearn import datasets
iris = pd.read_csv("../input/iris-flower-dataset/IRIS.csv")

x = iris.iloc[:, [0, 1, 2, 3]].values


In [None]:
iris.head().style.background_gradient(cmap =sns.cubehelix_palette(as_cmap=True))

In [None]:
iris.info()

- 1-sepal length in cm
- 2-sepal width in cm
- 3-petal length in cm
- 4-petal width in cm
- 5-class:

In [None]:
# checking the simple statistical parameters 

iris.describe(include='all')

In [None]:
X = iris.iloc[:,:-1].values #Set our training data

y = iris.iloc[:,-1].values #We'll use this just for visualization as clustering doesn't require labels

<a id="3"></a>
## 3.EDA

In [None]:
# checking the number of rows and columns in train dataset
rows,col=x.shape
print ('Row:' , rows,'\nColumns:',col)

In [None]:
#number of null values 
iris.isnull().sum()

In [None]:
#To check number of uniqe elements in each columns.
iris.nunique()

In [None]:
fig = px.scatter(data_frame=iris, x='sepal_length',color='species')
fig.update_layout(width=800, height=600,
                  xaxis=dict(title='sepallength',color="#BF40BF"),
                 yaxis=dict(title="Flower Name",color="#BF40BF"))
fig.show()

In [None]:
fig = px.scatter(data_frame=iris, x='sepal_width',color='species')
fig.update_layout(width=800, height=600,
                  xaxis=dict(title='sepalwidth',color="#BF40BF"),
                 yaxis=dict(title="Flower Name",color="#BF40BF"))
fig.show()

In [None]:
fig = px.scatter(data_frame=iris, x='petal_length',color='species')
fig.update_layout(width=800, height=600,
                  xaxis=dict(title='petallength',color="#BF40BF"),
                 yaxis=dict(title="Flower Name",color="#BF40BF"))
fig.show()

In [None]:
fig = px.scatter(data_frame=iris, x='petal_width',color='species')
fig.update_layout(width=800, height=600,
                  xaxis=dict(title='petalwidth',color="#BF40BF"),
                 yaxis=dict(title="Flower Name",color="#BF40BF"))
fig.show()

<a id="5"></a>
## 4.K-MEANS

In [None]:
kmeans_set={"init":"random","n_init":10,"max_iter":300,"random_state":42}

In [None]:
scaler= StandardScaler()
scaled_features= scaler.fit_transform(x)

In [None]:

List=[]
for k in range(1,20):
    kmeans= KMeans(n_clusters=k, **kmeans_set) #** open dictionry
    kmeans.fit(scaled_features)
    List.append(kmeans.inertia_) 

In [None]:
plt.style.use("fivethirtyeight")
plt.plot(range(1,20),List)
plt.xticks(range(1,20))
plt.xlabel('number of clusters')
plt.ylabel('inertia')
plt.show()

In [None]:
from kneed import KneeLocator
k1=KneeLocator(range(1,20),List , curve='convex', direction= 'decreasing')
k1.elbow

In [None]:
plt.style.use("fivethirtyeight")
plt.plot(range(1,20),List)
plt.xticks(range(1,20))
plt.xlabel('number of clusters')
plt.ylabel('List')
plt.axvline(x=k1.elbow, color='b', label= 'axvline-full height', ls= '--')
plt.show()

In [None]:
silhouette_coefficients =[]
for k in range(2,20):#1 is the worse
    kmeans=KMeans(n_clusters=k, **kmeans_set)
    kmeans.fit(scaled_features)
    score= silhouette_score(scaled_features, kmeans.labels_)
    silhouette_coefficients.append(score)

In [None]:
plt.style.use("fivethirtyeight")
plt.plot(range(2,20),silhouette_coefficients)
plt.xticks(range(2,20))
plt.xlabel('number of clusters')
plt.ylabel('silhouette coefficients')
plt.show()

In [None]:
kmeans= KMeans(n_clusters=3, **kmeans_set)
y_kmeans= kmeans.fit_predict(x)

In [None]:

centroids= kmeans.cluster_centers_
print(centroids)
#Visualising the clusters
plt.scatter(x[y_kmeans == 0, 0], x[y_kmeans == 0, 1], s = 100, c = 'purple', label = 'Iris-setosa')
plt.scatter(x[y_kmeans == 1, 0], x[y_kmeans == 1, 1], s = 100, c = 'orange', label = 'Iris-versicolour')
plt.scatter(x[y_kmeans == 2, 0], x[y_kmeans == 2, 1], s = 100, c = 'green', label = 'Iris-virginica')
#plotting the centroid of the clusters
plt.scatter(centroids[:,0], centroids[:,1],c='red', s=50)
plt.legend()
plt.show()

In [None]:
# 3d scatterplot using matplotlib

fig = plt.figure(figsize = (15,15))
ax = fig.add_subplot(111, projection='3d')
plt.scatter(x[y_kmeans == 0, 0], x[y_kmeans == 0, 1], s = 100, c = 'purple', label = 'Iris-setosa')
plt.scatter(x[y_kmeans == 1, 0], x[y_kmeans == 1, 1], s = 100, c = 'orange', label = 'Iris-versicolour')
plt.scatter(x[y_kmeans == 2, 0], x[y_kmeans == 2, 1], s = 100, c = 'green', label = 'Iris-virginica')

#Plotting the centroids of the clusters
plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:,1], s = 100, c = 'red', label = 'Centroids')
plt.show()

In [None]:
kmeans= KMeans(n_clusters=3, **kmeans_set).fit(x)


In [None]:
cluster=kmeans.labels_

In [None]:
cluster  #our tag values are between 0-2

In [None]:
#let's divide our data into groups
iris["cluster_no"]=cluster
iris.head(12)

In [None]:
iris.tail()