# Iris dataset experiment (classification)

In here, we are experimenting some of Classification algorithms built-in scikit-learn using iris dataset

Purposes: for comparing accuracy of each algorithm

In [1]:
import numpy as np
import pandas as pd
import sklearn
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score

In [2]:
data = pd.read_csv("iris.csv")
data

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa
...,...,...,...,...,...,...
145,146,6.7,3.0,5.2,2.3,Iris-virginica
146,147,6.3,2.5,5.0,1.9,Iris-virginica
147,148,6.5,3.0,5.2,2.0,Iris-virginica
148,149,6.2,3.4,5.4,2.3,Iris-virginica


## Data Preprocessing

In [3]:
# this is exactly the same iris dataset
# but the label has been mapping by sklearn
data = load_iris()

In [4]:
X = data.data
y = data.target

In [5]:
X

array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2],
       [5.4, 3.9, 1.7, 0.4],
       [4.6, 3.4, 1.4, 0.3],
       [5. , 3.4, 1.5, 0.2],
       [4.4, 2.9, 1.4, 0.2],
       [4.9, 3.1, 1.5, 0.1],
       [5.4, 3.7, 1.5, 0.2],
       [4.8, 3.4, 1.6, 0.2],
       [4.8, 3. , 1.4, 0.1],
       [4.3, 3. , 1.1, 0.1],
       [5.8, 4. , 1.2, 0.2],
       [5.7, 4.4, 1.5, 0.4],
       [5.4, 3.9, 1.3, 0.4],
       [5.1, 3.5, 1.4, 0.3],
       [5.7, 3.8, 1.7, 0.3],
       [5.1, 3.8, 1.5, 0.3],
       [5.4, 3.4, 1.7, 0.2],
       [5.1, 3.7, 1.5, 0.4],
       [4.6, 3.6, 1. , 0.2],
       [5.1, 3.3, 1.7, 0.5],
       [4.8, 3.4, 1.9, 0.2],
       [5. , 3. , 1.6, 0.2],
       [5. , 3.4, 1.6, 0.4],
       [5.2, 3.5, 1.5, 0.2],
       [5.2, 3.4, 1.4, 0.2],
       [4.7, 3.2, 1.6, 0.2],
       [4.8, 3.1, 1.6, 0.2],
       [5.4, 3.4, 1.5, 0.4],
       [5.2, 4.1, 1.5, 0.1],
       [5.5, 4.2, 1.4, 0.2],
       [4.9, 3

In [6]:
y

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [7]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, random_state=1)

In [8]:
print(X_train.shape)

(120, 4)


## Logistic Regression

In [9]:
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression(max_iter=1000)

In [10]:
lr.fit(X_train, y_train)

LogisticRegression(max_iter=1000)

In [11]:
y_predict = lr.predict(X_test)
y_predict

array([0, 1, 1, 0, 2, 1, 2, 0, 0, 2, 1, 0, 2, 1, 1, 0, 1, 1, 0, 0, 1, 1,
       2, 0, 2, 1, 0, 0, 1, 2])

In [12]:
accuracy = accuracy_score(y_predict, y_test)
accuracy

0.9666666666666667

## Decision Tree

In [13]:
from sklearn import tree
dtc = tree.DecisionTreeClassifier()

In [14]:
dtc.fit(X_train, y_train)

DecisionTreeClassifier()

In [15]:
y_predict = dtc.predict(X_test)
y_predict

array([0, 1, 1, 0, 2, 1, 2, 0, 0, 2, 1, 0, 2, 1, 1, 0, 1, 1, 0, 0, 1, 1,
       2, 0, 2, 1, 0, 0, 1, 2])

In [16]:
accuracy = accuracy_score(y_predict, y_test)
accuracy

0.9666666666666667

## Random Forest

In [17]:
from sklearn.ensemble import RandomForestClassifier
rfc = RandomForestClassifier()

In [18]:
rfc.fit(X_train, y_train)

RandomForestClassifier()

In [19]:
y_predict = rfc.predict(X_test)
y_predict

array([0, 1, 1, 0, 2, 1, 2, 0, 0, 2, 1, 0, 2, 1, 1, 0, 1, 1, 0, 0, 1, 1,
       2, 0, 2, 1, 0, 0, 1, 2])

In [20]:
accuracy = accuracy_score(y_predict, y_test)
accuracy

0.9666666666666667

## K-Nearest Neighbor

In [21]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier()

In [22]:
knn.fit(X_train, y_train)

KNeighborsClassifier()

In [23]:
y_predict = knn.predict(X_test)
y_predict

array([0, 1, 1, 0, 2, 1, 2, 0, 0, 2, 1, 0, 2, 1, 1, 0, 1, 1, 0, 0, 1, 1,
       1, 0, 2, 1, 0, 0, 1, 2])

In [24]:
accuracy = accuracy_score(y_predict, y_test)
accuracy

1.0

## Naive Bayes Classifier

<b>Gaussian Naive Bayes</b>

In [25]:
from sklearn.naive_bayes import GaussianNB
gnb = GaussianNB()

In [26]:
gnb.fit(X_train, y_train)

GaussianNB()

In [27]:
y_predict = gnb.predict(X_test)
y_predict

array([0, 1, 1, 0, 2, 1, 2, 0, 0, 2, 1, 0, 2, 1, 1, 0, 1, 1, 0, 0, 1, 1,
       2, 0, 2, 1, 0, 0, 1, 2])

In [28]:
accuracy = accuracy_score(y_predict, y_test)
accuracy

0.9666666666666667

<b>Multinomial Naive Bayes</b>

In [29]:
from sklearn.naive_bayes import MultinomialNB
mnb = MultinomialNB()

In [30]:
mnb.fit(X_train, y_train)

MultinomialNB()

In [31]:
y_predict = mnb.predict(X_test)
y_predict

array([0, 2, 2, 0, 2, 2, 2, 0, 0, 2, 2, 0, 2, 2, 2, 0, 2, 2, 0, 0, 2, 2,
       2, 0, 2, 2, 0, 0, 2, 2])

In [32]:
accuracy = accuracy_score(y_predict, y_test)
accuracy

0.5666666666666667

<b>Bernoulli Naive Bayes</b>

In [33]:
from sklearn.naive_bayes import BernoulliNB
bnb = BernoulliNB()

In [34]:
bnb.fit(X_train, y_train)

BernoulliNB()

In [35]:
y_predict = bnb.predict(X_test)
y_predict

array([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2])

In [36]:
accuracy = accuracy_score(y_predict, y_test)
accuracy

0.2

<b>Categorical Naive Bayes</b>

In [37]:
from sklearn.naive_bayes import CategoricalNB
cat_nb = CategoricalNB()

In [38]:
cat_nb.fit(X_train, y_train)

CategoricalNB()

In [39]:
y_predict = cat_nb.predict(X_test)
y_predict

array([0, 1, 1, 0, 2, 1, 2, 0, 0, 2, 1, 0, 2, 1, 1, 0, 1, 1, 0, 0, 1, 1,
       2, 0, 2, 1, 0, 0, 1, 2])

In [40]:
accuracy = accuracy_score(y_predict, y_test)
accuracy

0.9666666666666667

## Support Vector Machine

<b>Linear Kernel</b>

In [41]:
from sklearn.svm import SVC
linear_svm = SVC(kernel='linear')

In [42]:
linear_svm.fit(X_train, y_train)

SVC(kernel='linear')

In [43]:
y_predict = linear_svm.predict(X_test)
y_predict

array([0, 1, 1, 0, 2, 1, 2, 0, 0, 2, 1, 0, 2, 1, 1, 0, 1, 1, 0, 0, 1, 1,
       1, 0, 2, 1, 0, 0, 1, 2])

In [44]:
accuracy = accuracy_score(y_predict, y_test)
accuracy

1.0

<b>RBF Kernel</b>

In [45]:
from sklearn.svm import SVC
rbf_svm = SVC(kernel='rbf')

In [46]:
rbf_svm.fit(X_train, y_train)

SVC()

In [47]:
y_predict = rbf_svm.predict(X_test)
y_predict

array([0, 1, 1, 0, 2, 1, 2, 0, 0, 2, 1, 0, 2, 1, 1, 0, 1, 1, 0, 0, 1, 1,
       2, 0, 2, 1, 0, 0, 1, 2])

In [48]:
accuracy = accuracy_score(y_predict, y_test)
accuracy

0.9666666666666667

<b>Polynomial Kernel</b>

In [49]:
from sklearn.svm import SVC
polynom_svm = SVC(kernel='poly')

In [50]:
polynom_svm.fit(X_train, y_train)

SVC(kernel='poly')

In [51]:
y_predict = polynom_svm.predict(X_test)
y_predict

array([0, 1, 1, 0, 2, 1, 2, 0, 0, 2, 1, 0, 2, 1, 1, 0, 1, 1, 0, 0, 1, 1,
       2, 0, 2, 1, 0, 0, 1, 2])

In [52]:
accuracy = accuracy_score(y_predict, y_test)
accuracy

0.9666666666666667

<b>Sigmoid Kernel</b>

In [53]:
from sklearn.svm import SVC
sigmoid_svm = SVC(kernel='sigmoid')

In [54]:
sigmoid_svm.fit(X_train, y_train)

SVC(kernel='sigmoid')

In [55]:
y_predict = sigmoid_svm.predict(X_test)
y_predict

array([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2])

In [56]:
accuracy = accuracy_score(y_predict, y_test)
accuracy

0.2

## Multilayer Perceptron (Artificial Neural Network)

<b>Activation Function: ReLU, Optimizer: adam</b>

In [57]:
from sklearn.neural_network import MLPClassifier
MLP = MLPClassifier(activation='relu', solver='adam', learning_rate='constant', batch_size='auto', max_iter=5000)

In [58]:
MLP.fit(X_train, y_train)

MLPClassifier(max_iter=5000)

In [59]:
y_predict = MLP.predict(X_test)
y_predict

array([0, 1, 1, 0, 2, 1, 2, 0, 0, 2, 1, 0, 2, 1, 1, 0, 1, 1, 0, 0, 1, 1,
       1, 0, 2, 1, 0, 0, 1, 2])

In [60]:
accuracy = accuracy_score(y_predict, y_test)
accuracy

1.0

<b>Activation Function: ReLU, Optimizer: SGD</b>

In [61]:
from sklearn.neural_network import MLPClassifier
MLP = MLPClassifier(activation='relu', solver='sgd', learning_rate='constant', batch_size='auto', max_iter=5000)

In [62]:
MLP.fit(X_train, y_train)

MLPClassifier(max_iter=5000, solver='sgd')

In [63]:
y_predict = MLP.predict(X_test)
y_predict

array([0, 1, 1, 0, 2, 1, 2, 0, 0, 2, 1, 0, 2, 1, 1, 0, 1, 1, 0, 0, 1, 2,
       1, 0, 2, 1, 0, 0, 1, 2])

In [64]:
accuracy = accuracy_score(y_predict, y_test)
accuracy

0.9666666666666667

<b>Activation Function: tanh, Optimizer: adam</b>

In [65]:
from sklearn.neural_network import MLPClassifier
MLP = MLPClassifier(activation='tanh', solver='adam', learning_rate='constant', batch_size='auto', max_iter=5000)

In [66]:
MLP.fit(X_train, y_train)

MLPClassifier(activation='tanh', max_iter=5000)

In [67]:
y_predict = MLP.predict(X_test)
y_predict

array([0, 1, 1, 0, 2, 1, 2, 0, 0, 2, 1, 0, 2, 1, 1, 0, 1, 1, 0, 0, 1, 1,
       1, 0, 2, 1, 0, 0, 1, 2])

In [68]:
accuracy = accuracy_score(y_predict, y_test)
accuracy

1.0

<b>Activation Function: tanh, Optimizer: SGD</b>

In [69]:
from sklearn.neural_network import MLPClassifier
MLP = MLPClassifier(activation='tanh', solver='sgd', learning_rate='constant', batch_size='auto', max_iter=5000)

In [70]:
MLP.fit(X_train, y_train)

MLPClassifier(activation='tanh', max_iter=5000, solver='sgd')

In [71]:
y_predict = MLP.predict(X_test)
y_predict

array([0, 1, 1, 0, 2, 1, 2, 0, 0, 2, 1, 0, 2, 1, 1, 0, 1, 1, 0, 0, 1, 2,
       1, 0, 2, 1, 0, 0, 1, 2])

In [72]:
accuracy = accuracy_score(y_predict, y_test)
accuracy

0.9666666666666667

<b>Activation Function: Sigmoid, Optimizer: adam</b>

In [73]:
from sklearn.neural_network import MLPClassifier
MLP = MLPClassifier(activation='logistic', solver='adam', learning_rate='constant', batch_size='auto', max_iter=5000)

In [74]:
MLP.fit(X_train, y_train)

MLPClassifier(activation='logistic', max_iter=5000)

In [75]:
y_predict = MLP.predict(X_test)
y_predict

array([0, 1, 1, 0, 2, 1, 2, 0, 0, 2, 1, 0, 2, 1, 1, 0, 1, 1, 0, 0, 1, 1,
       1, 0, 2, 1, 0, 0, 1, 2])

In [76]:
accuracy = accuracy_score(y_predict, y_test)
accuracy

1.0

<b>Activation Function: Sigmoid, Optimizer: SGD</b>

In [77]:
from sklearn.neural_network import MLPClassifier
MLP = MLPClassifier(activation='logistic', solver='sgd', learning_rate='constant', batch_size='auto', max_iter=5000)

In [78]:
MLP.fit(X_train, y_train)

MLPClassifier(activation='logistic', max_iter=5000, solver='sgd')

In [79]:
y_predict = MLP.predict(X_test)
y_predict

array([0, 1, 1, 0, 2, 1, 2, 0, 0, 2, 1, 0, 2, 1, 1, 0, 1, 1, 0, 0, 1, 1,
       2, 0, 2, 1, 0, 0, 1, 2])

In [80]:
accuracy = accuracy_score(y_predict, y_test)
accuracy

0.9666666666666667