# MACHINE LEARNING WITH IRIS DATA SET

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import warnings 
warnings.filterwarnings("ignore")

In [2]:
import os 
print(os.listdir("../input/iris"))

### UNDERSTANDING THE DATA

In [3]:
iris = pd.read_csv("../input/iris/Iris.csv")
iris.head(15)

In [4]:
# ADDS STATS DATA LIKE MEAN, MEDIAN, STD DEV, MODE, COUNT, MIN, MAX, QUARTILE
iris.describe()

In [5]:
iris.plot(kind = 'scatter',x = 'SepalLengthCm', y = 'SepalWidthCm')
iris.plot(kind = 'scatter',x = 'PetalLengthCm', y = 'PetalWidthCm')

In [6]:
#it shows bivariant scatter plot and univariant histogram in same fig
sns.jointplot(x = 'PetalLengthCm', y = 'PetalWidthCm',data = iris, size = 5)

In [7]:
sns.FacetGrid(iris, hue = 'Species', size = 5).map(plt.scatter, 'PetalLengthCm','PetalWidthCm').add_legend()
sns.FacetGrid(iris, hue = 'Species', size = 5).map(plt.scatter, 'SepalLengthCm','SepalWidthCm').add_legend()

In [8]:
#gives the min, max, median, lower quart(25%), upper quart(75%)
sns.boxplot(x = 'Species', y = 'PetalLengthCm', data = iris )

In [9]:
#gives distribution of observations in dataset similar to histogram
sns.FacetGrid(iris,hue = 'Species', size = 5).map(sns.kdeplot, 'PetalLengthCm').add_legend()

In [10]:
sns.pairplot(iris.drop("Id", axis = 1), hue = 'Species', size = 3)

### DIVIDING THE DATA FOR TRAINING AND TESTING

In [11]:
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from sklearn.model_selection import train_test_split

In [18]:
x = iris.iloc[:, 1:-1].values
y = iris.iloc[:, -1].values

In [19]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.3)

### TRAINING THE MODEL

In [20]:
#logistic regression
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression()
classifier.fit(x_train, y_train)
y_pred = classifier.predict(x_test)

print(classification_report(y_pred, y_test))
print(confusion_matrix(y_pred, y_test))
print('Accuracy : ', accuracy_score(y_pred, y_test))

In [22]:
#knn classification
from sklearn.neighbors import KNeighborsClassifier
classifier = KNeighborsClassifier(n_neighbors = 8)
classifier.fit(x_train, y_train)
y_pred = classifier.predict(x_test)

print(classification_report(y_pred, y_test))
print(confusion_matrix(y_pred, y_test))
print('Accuracy : ', accuracy_score(y_pred, y_test))

In [23]:
#svc classification
from sklearn.svm import SVC
classifier = SVC()
classifier.fit(x_train, y_train)
y_pred = classifier.predict(x_test)

print(classification_report(y_pred, y_test))
print(confusion_matrix(y_pred, y_test))
print('Accuracy : ', accuracy_score(y_pred, y_test))

In [24]:
#decision tree classification
from sklearn.tree import DecisionTreeClassifier
classifier = DecisionTreeClassifier()
classifier.fit(x_train, y_train)
y_pred = classifier.predict(x_test)

print(classification_report(y_pred, y_test))
print(confusion_matrix(y_pred, y_test))
print('Accuracy : ', accuracy_score(y_pred, y_test))

In [25]:
#naive bayes classification
from sklearn.naive_bayes import GaussianNB
classifier = GaussianNB()
classifier.fit(x_train, y_train)
y_pred = classifier.predict(x_test)

print(classification_report(y_pred, y_test))
print(confusion_matrix(y_pred, y_test))
print('Accuracy : ', accuracy_score(y_pred, y_test))

In [29]:
#mlp classifier classification
from sklearn.neural_network import MLPClassifier
classifier = MLPClassifier()
classifier.fit(x_train, y_train)
y_pred = classifier.predict(x_test)

print(classification_report(y_pred, y_test))
print(confusion_matrix(y_pred, y_test))
print('Accuracy : ', accuracy_score(y_pred, y_test))

In [26]:
#extreme gradient boost classification
from xgboost import XGBClassifier
classifier = XGBClassifier()
classifier.fit(x_train, y_train)
y_pred = classifier.predict(x_test)

print(classification_report(y_pred, y_test))
print(confusion_matrix(y_pred, y_test))
print('Accuracy : ', accuracy_score(y_pred, y_test))