### Reading the File using pandas


In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as  plt


df = pd.read_csv('iris.csv' )

df.columns = ['No.', 'sepal_length', 'sepal_width' , 'petal_length', 'petal_width', 'species']

#df.rename(columns = {"setosa" : 1,"versicolor" : 2,"virginica" : 3} , inplace= True)
                

print(df.head())


print(df.shape)

   No.  sepal_length  sepal_width  petal_length  petal_width species
0    1           5.1          3.5           1.4          0.2  setosa
1    2           4.9          3.0           1.4          0.2  setosa
2    3           4.7          3.2           1.3          0.2  setosa
3    4           4.6          3.1           1.5          0.2  setosa
4    5           5.0          3.6           1.4          0.2  setosa
(150, 6)


In [2]:
df.columns = df.columns.str.replace("_", " ")
df.columns=df.columns.str.capitalize()
df.columns

Index(['No.', 'Sepal length', 'Sepal width', 'Petal length', 'Petal width',
       'Species'],
      dtype='object')

In [3]:
df = df.drop(columns = ['No.'])

df.head()

Unnamed: 0,Sepal length,Sepal width,Petal length,Petal width,Species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [4]:
in_clear=print(df.describe())

       Sepal length  Sepal width  Petal length  Petal width
count    150.000000   150.000000    150.000000   150.000000
mean       5.843333     3.057333      3.758000     1.199333
std        0.828066     0.435866      1.765298     0.762238
min        4.300000     2.000000      1.000000     0.100000
25%        5.100000     2.800000      1.600000     0.300000
50%        5.800000     3.000000      4.350000     1.300000
75%        6.400000     3.300000      5.100000     1.800000
max        7.900000     4.400000      6.900000     2.500000


### Checking for Null values

In [16]:
df.isnull().any().any()

False

### Checking for Duplicates 

In [17]:
df.columns.duplicated()

array([False, False, False, False, False])

In [18]:
df.Species.unique()

array([1, 2, 3], dtype=int64)

### Renaming Species 

In [19]:
df =df.replace("setosa" , 1)
df =df.replace("versicolor" , 2)
df =df.replace("virginica" , 3)

df.Species.unique()

array([1, 2, 3], dtype=int64)

### Splitting Data into Train and Test sets 

In [20]:
# splitting data into train and test data sets 

from sklearn.model_selection import train_test_split
from sklearn import metrics # for checking the model accuracy


train, test = train_test_split(df, test_size=0.3) 


print(train.shape , test.shape)

(105, 5) (45, 5)


In [21]:
train_x = train[['Sepal length','Sepal width','Petal length','Petal width']]
train_y = train.Species

test_x = test[['Sepal length','Sepal width','Petal length','Petal width']]
test_y = test.Species #output



## Applying Mechine Learning Algorithms 
### KNN Algorithm

In [22]:
from sklearn.neighbors import KNeighborsClassifier

model = KNeighborsClassifier(n_neighbors=20) 
model.fit(train_x, train_y)
prediction = model.predict(test_x)
print('The accuracy of KNN is: ', metrics.accuracy_score(prediction, test_y))


result = model.predict([[4,3,1,0.2]])
print(result)

The accuracy of KNN is:  0.9333333333333333
[1]


### Support Vector Machine 

In [23]:
from sklearn import svm 
model = svm.SVC() 

model.fit(train_x, train_y)

prediction = model.predict(test_x)
print('The accuracy of the SVM is: ', metrics.accuracy_score(prediction, test_y)) 

result = model.predict([[4,3,7,1]])
print(result)


The accuracy of the SVM is:  0.9555555555555556
[3]




### LogisticRegression

In [24]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression()
model.fit(train_x, train_y)
prediction = model.predict(test_x)
print('The accuracy of Logistic Regression is: ', metrics.accuracy_score(prediction, test_y))

result = model.predict([[5,3,4,1]])
print(result)

The accuracy of Logistic Regression is:  0.9333333333333333
[2]




### DecisionTreeClassifier

In [15]:
from sklearn.tree import DecisionTreeClassifier

model = DecisionTreeClassifier()
model.fit(train_x, train_y)
prediction = model.predict(test_x)
print('The accuracy of Decision Tree is: ', metrics.accuracy_score(prediction, test_y))

result4 = model.predict([[5,3,2,1]])
print(result4)

The accuracy of Decision Tree is:  0.9555555555555556
[1]
