In [73]:
#importing Libraries
import numpy as np
import random
import pandas as pd
from pandas import plotting

import plotly.offline as py
from plotly.offline import init_notebook_mode, iplot
import plotly.graph_objs as go
init_notebook_mode(connected=True)  

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
import xgboost as xgb
from lightgbm import LGBMClassifier
import lightgbm as  lgb
from xgboost.sklearn import XGBClassifier

from sklearn.metrics import  accuracy_score
from sklearn.metrics import confusion_matrix


import warnings
warnings.filterwarnings('ignore')

In [74]:
data=pd.read_csv("IRIS.csv")

In [72]:
data.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [16]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
sepal_length    150 non-null float64
sepal_width     150 non-null float64
petal_length    150 non-null float64
petal_width     150 non-null float64
species         150 non-null object
dtypes: float64(4), object(1)
memory usage: 5.9+ KB


In [31]:
print("We have {} different types of species in this dataset".format(data.species.nunique()))
print(data.species.unique())

We have 3 different types of species in this dataset
['Iris-setosa' 'Iris-versicolor' 'Iris-virginica']


In [32]:
data.species.value_counts()

Iris-setosa        50
Iris-virginica     50
Iris-versicolor    50
Name: species, dtype: int64

In [33]:
data.describe()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
count,150.0,150.0,150.0,150.0
mean,5.843333,3.054,3.758667,1.198667
std,0.828066,0.433594,1.76442,0.763161
min,4.3,2.0,1.0,0.1
25%,5.1,2.8,1.6,0.3
50%,5.8,3.0,4.35,1.3
75%,6.4,3.3,5.1,1.8
max,7.9,4.4,6.9,2.5


In [75]:
fig = go.Figure()

species = ['Iris-setosa', 'Iris-versicolor', 'Iris-virginica']
for specie in species:
    fig.add_trace(go.Violin(x=data['species'][data['species']==specie],
                                y=data['petal_length'][data['species'] == specie],
                                name=specie,
                                box_visible=True,
                                meanline_visible=True))
fig.update_yaxes(title_text='petal_length')
py.offline.iplot(fig)

In [60]:
fig = go.Figure()

species = ['Iris-setosa', 'Iris-versicolor', 'Iris-virginica']
for specie in species:
    fig.add_trace(go.Violin(x=data['species'][data['species']==specie],
                                y=data['petal_width'][data['species'] == specie],
                                name=specie,
                                box_visible=True,
                                meanline_visible=True))
fig.update_yaxes(title_text='petal_width')
py.offline.iplot(fig)

In [63]:
fig = go.Figure()

species = ['Iris-setosa', 'Iris-versicolor', 'Iris-virginica']
for specie in species:
    fig.add_trace(go.Violin(x=data['species'][data['species']==specie],
                                y=data['sepal_length'][data['species'] == specie],
                                name=specie,
                                box_visible=True,
                                meanline_visible=True))
fig.update_yaxes(title_text='sepal_length')
py.offline.iplot(fig)

In [62]:
fig = go.Figure()

species = ['Iris-setosa', 'Iris-versicolor', 'Iris-virginica']
for specie in species:
    fig.add_trace(go.Violin(x=data['species'][data['species']==specie],
                                y=data['sepal_width'][data['species'] == specie],
                                name=specie,
                                box_visible=True,
                                meanline_visible=True))
fig.update_yaxes(title_text='sepal_width')
py.offline.iplot(fig)

In [69]:
fig = go.Figure()

species = ['Iris-setosa', 'Iris-versicolor', 'Iris-virginica']
for specie in species:
    fig.add_trace(go.Scatter(x=data['petal_length'][data['species']==specie],
                                y=data['petal_width'][data['species'] == specie],
                                name=specie,
                             mode='markers'
                                ))
fig.update_xaxes(title_text='petal_length')
fig.update_yaxes(title_text='petal_width')
py.offline.iplot(fig)

In [71]:
fig = go.Figure()

species = ['Iris-setosa', 'Iris-versicolor', 'Iris-virginica']
for specie in species:
    fig.add_trace(go.Scatter(x=data['sepal_length'][data['species']==specie],
                                y=data['sepal_width'][data['species'] == specie],
                                name=specie,
                             mode='markers'
                                ))
fig.update_xaxes(title_text='sepal_length')
fig.update_yaxes(title_text='sepal_width')
py.offline.iplot(fig)

In [20]:
train, test = train_test_split(data, test_size = 0.3)# in this our main data is split into train and test
# the attribute test_size=0.3 splits the data into 70% and 30% ratio. train=70% and test=30%
print(train.shape)
print(test.shape)

(105, 5)
(45, 5)


In [25]:
X_train = train[['sepal_length','sepal_width','petal_length','petal_width']]# taking the training data features
y_train=train.species# output of our training data
X_test= test[['sepal_length','sepal_width','petal_length','petal_width']] # taking test data features
y_test =test.species   #output value of test data

In [26]:
LG=LogisticRegression()
LG.fit(X_train,y_train)
LG_prediction=LG.predict(X_test)
print("the accuracy of the Logistic Regression model is :",accuracy_score(LG_prediction,y_test))
confusion_matrix(LG_prediction,y_test)

the accuracy of the Logistic Regression model is : 0.9555555555555556


array([[13,  0,  0],
       [ 0, 14,  0],
       [ 0,  2, 16]])

In [27]:
SVC=SVC()
SVC.fit(X_train,y_train)
SVC_prediction=SVC.predict(X_test)
print("the accuracy of the Support Vector Machine Classifier model is :",accuracy_score(y_test,SVC_prediction))
confusion_matrix(SVC_prediction,y_test)

the accuracy of the Support Vector Machine Classifier model is : 0.9777777777777777


array([[13,  0,  0],
       [ 0, 16,  1],
       [ 0,  0, 15]])

In [28]:
DTC=DecisionTreeClassifier(max_leaf_nodes=3)
DTC.fit(X_train,y_train)
DTC_prediction=DTC.predict(X_test)
print('The accuracy of the Decision Tree Classifier model is',accuracy_score(DTC_prediction,y_test))
confusion_matrix(DTC_prediction,y_test)

The accuracy of the Decision Tree Classifier model is 0.9555555555555556


array([[13,  0,  0],
       [ 0, 16,  2],
       [ 0,  0, 14]])

In [29]:
KNN=KNeighborsClassifier(n_neighbors=3)
KNN.fit(X_train,y_train)
KNN_prediction=KNN.predict(X_test)
print('The accuracy of the KNeighborsClassifier model is',accuracy_score(KNN_prediction,y_test))
confusion_matrix(KNN_prediction,y_test)

The accuracy of the KNeighborsClassifier model is 0.9555555555555556


array([[13,  0,  0],
       [ 0, 15,  1],
       [ 0,  1, 15]])

In [30]:
GNB=GaussianNB()
GNB.fit(X_train,y_train)
GNB_prediction=GNB.predict(X_test)
print('The accuracy of the GaussionNB model is',accuracy_score(GNB_prediction,y_test))
confusion_matrix(GNB_prediction,y_test)

The accuracy of the GaussionNB model is 0.9555555555555556


array([[13,  0,  0],
       [ 0, 16,  2],
       [ 0,  0, 14]])

In [31]:
RFC=RandomForestClassifier()
RFC.fit(X_train,y_train)
RFC_prediction=RFC.predict(X_test)
print('The accuracy of the RandomForestClassifier model is',accuracy_score(RFC_prediction,y_test))
confusion_matrix(RFC_prediction,y_test)

The accuracy of the RandomForestClassifier model is 0.9555555555555556


array([[13,  0,  0],
       [ 0, 16,  2],
       [ 0,  0, 14]])

In [32]:
XGB=XGBClassifier()
XGB.fit(X_train,y_train)
XGB_prediction=XGB.predict(X_test)
print('The accuracy of the XGBClassifier model is',accuracy_score(XGB_prediction,y_test))
confusion_matrix(XGB_prediction,y_test)

The accuracy of the XGBClassifier model is 0.9777777777777777


array([[13,  0,  0],
       [ 0, 16,  1],
       [ 0,  0, 15]])

In [35]:
LGB=LGBMClassifier()
LGB.fit(X_train,y_train)
LGB_prediction=LGB.predict(X_test)
print('The accuracy of the LGBMClassifier model is',accuracy_score(LGB_prediction,y_test))
confusion_matrix(LGB_prediction,y_test)

The accuracy of the LGBMClassifier model is 0.9555555555555556


array([[13,  0,  0],
       [ 0, 15,  1],
       [ 0,  1, 15]])