## Import Libraries and Read Dataset

In [1]:
import pandas as pd
import numpy as np
df=pd.read_csv("Iris.csv")
df.head()

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


## Preprocessing

In [2]:
from sklearn.preprocessing import LabelEncoder
le=LabelEncoder()
df.Species=le.fit_transform(df.Species)
df.head()

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


## Splitting of 80% Train and 20% test

In [3]:
from sklearn.model_selection import train_test_split
train,test=train_test_split(df,test_size=0.2,random_state=1)
print(train.shape)
print(test.shape)

(120, 5)
(30, 5)


## Splitting of 60% Train Fold and 20% Train Fold from 80% Train

In [4]:
k_train,k_valid=train_test_split(train,test_size=0.2,random_state=1)
print(k_train.shape)
print(k_valid.shape)

(96, 5)
(24, 5)


## Building the KNN classifier

In [5]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
clf=KNeighborsClassifier(n_neighbors=7)

## Splitting for X and Y for KFold Cross Validation

In [6]:
x=k_train.iloc[:,0:4]
y=k_train.iloc[:,4]   
print(x.shape)
print(y.shape)

(96, 4)
(96,)


## Iterative Learning
## K Fold Cross Validation without using KFold function

In [7]:
k=5
skill=[]
models=["model"+str(i) for i in range(0,k)]
print(models)
for i in range(0,k):
    x_train_fold,x_test_fold,y_train_fold,y_test_fold=train_test_split(x,y,test_size=0.2,shuffle=True)
    print(list(x_train_fold.index))
    print(list(y_test_fold.index))
    print("======")
    models[i]=clf.fit(x_train_fold,y_train_fold)
    skill.append(accuracy_score(y_test_fold, models[i].predict(x_test_fold))*100)
print(skill)

['model0', 'model1', 'model2', 'model3', 'model4']
[74, 128, 17, 60, 144, 133, 2, 76, 103, 118, 30, 26, 114, 36, 112, 10, 136, 137, 58, 79, 12, 48, 122, 49, 87, 89, 54, 6, 115, 61, 32, 65, 107, 18, 70, 80, 149, 142, 3, 53, 59, 110, 20, 126, 55, 34, 119, 37, 96, 24, 117, 50, 46, 83, 39, 9, 91, 105, 106, 132, 108, 124, 64, 52, 121, 109, 138, 63, 85, 67, 22, 11, 41, 104, 127, 8]
[28, 145, 62, 123, 88, 93, 47, 134, 95, 97, 86, 139, 27, 25, 15, 135, 82, 45, 147, 140]
[15, 41, 6, 91, 145, 136, 76, 37, 108, 61, 80, 134, 34, 2, 22, 119, 103, 133, 135, 140, 149, 137, 88, 139, 9, 138, 11, 110, 60, 50, 65, 93, 39, 123, 121, 105, 112, 28, 128, 10, 83, 17, 124, 45, 82, 3, 126, 95, 74, 58, 106, 47, 79, 142, 97, 52, 59, 144, 63, 86, 104, 118, 53, 24, 114, 89, 70, 109, 26, 64, 122, 32, 147, 67, 107, 117]
[46, 18, 85, 62, 36, 87, 12, 30, 132, 115, 96, 54, 20, 25, 49, 48, 55, 27, 8, 127]
[124, 123, 136, 65, 48, 118, 27, 55, 70, 62, 8, 64, 149, 80, 58, 133, 107, 132, 114, 119, 87, 137, 30, 89, 2, 18, 10,

In [8]:
test

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
14,5.8,4.0,1.2,0.2,0
98,5.1,2.5,3.0,1.1,1
75,6.6,3.0,4.4,1.4,1
16,5.4,3.9,1.3,0.4,0
131,7.9,3.8,6.4,2.0,2
56,6.3,3.3,4.7,1.6,1
141,6.9,3.1,5.1,2.3,2
44,5.1,3.8,1.9,0.4,0
29,4.7,3.2,1.6,0.2,0
120,6.9,3.2,5.7,2.3,2


## Evaluating the model

In [12]:
x_test=test.iloc[:,0:4]
y_test=test.iloc[:,4]
y_pred=models[1].predict(x_test)
print(accuracy_score(y_test,y_pred)*100)

96.66666666666667


## Predicting for Unseen Example

In [14]:
x_new=[[6,3,4,2]]
y_new=models[1].predict(x_new)
y_new

array([1])