In [1]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import pandas as pd

In [2]:
# read data from csv file
def read_csv(path):
    iris = pd.read_csv(path)
    x = iris.drop(["species"], axis=1)
    y = iris["species"]
    return x, y

In [3]:
######
#Demo.
######
# reads dataset from csv file and returns features and label as x and y
x,y = read_csv("D:\BTech\Machine Learning\KNN_Irish_Dataset\iris-flower-dataset\IRIS.csv")
print("Features :\n", x)
print("\n\nLables:", y)

Features :
      sepal_length  sepal_width  petal_length  petal_width
0             5.1          3.5           1.4          0.2
1             4.9          3.0           1.4          0.2
2             4.7          3.2           1.3          0.2
3             4.6          3.1           1.5          0.2
4             5.0          3.6           1.4          0.2
..            ...          ...           ...          ...
145           6.7          3.0           5.2          2.3
146           6.3          2.5           5.0          1.9
147           6.5          3.0           5.2          2.0
148           6.2          3.4           5.4          2.3
149           5.9          3.0           5.1          1.8

[150 rows x 4 columns]


Lables: 0         Iris-setosa
1         Iris-setosa
2         Iris-setosa
3         Iris-setosa
4         Iris-setosa
            ...      
145    Iris-virginica
146    Iris-virginica
147    Iris-virginica
148    Iris-virginica
149    Iris-virginica
Name: species, L

In [4]:
 # split the data for training and testing
def split(x, y):
    xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.2, random_state=0)
    return xtrain, xtest, ytrain, ytest

In [5]:
######
#Demo.
######
xtrain, xtest, ytrain, ytest = split(x, y)
print("xtrain data :\n", xtrain, "\n\nytrain data :\n", ytrain, "\n\nxtest data :\n", xtest, "\n\nytest data :\n", ytest)

xtrain data :
      sepal_length  sepal_width  petal_length  petal_width
137           6.4          3.1           5.5          1.8
84            5.4          3.0           4.5          1.5
27            5.2          3.5           1.5          0.2
127           6.1          3.0           4.9          1.8
132           6.4          2.8           5.6          2.2
..            ...          ...           ...          ...
9             4.9          3.1           1.5          0.1
103           6.3          2.9           5.6          1.8
67            5.8          2.7           4.1          1.0
117           7.7          3.8           6.7          2.2
47            4.6          3.2           1.4          0.2

[120 rows x 4 columns] 

ytrain data :
 137     Iris-virginica
84     Iris-versicolor
27         Iris-setosa
127     Iris-virginica
132     Iris-virginica
            ...       
9          Iris-setosa
103     Iris-virginica
67     Iris-versicolor
117     Iris-virginica
47         Iris-se

In [6]:
# training the model
def train(xtrain, ytrain, key = 6):
    knn = KNeighborsClassifier(n_neighbors = key)
    trained_model = knn.fit(xtrain, ytrain)
    return trained_model

In [7]:
######
#Demo.
######
model = train(xtrain, ytrain)
print("model object :", model)

model object : KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=6, p=2,
                     weights='uniform')


In [8]:
# predict the label of test data
def predict(knn, xtest):
    ylbl = knn.predict(xtest)
    print("\nGiven test data :\n", xtest)
    print("\nPredicted lable :\n", ylbl)
    return ylbl

In [9]:
######
#Demo.
######
ylabel = predict(model, xtest)


Given test data :
      sepal_length  sepal_width  petal_length  petal_width
114           5.8          2.8           5.1          2.4
62            6.0          2.2           4.0          1.0
33            5.5          4.2           1.4          0.2
107           7.3          2.9           6.3          1.8
7             5.0          3.4           1.5          0.2
100           6.3          3.3           6.0          2.5
40            5.0          3.5           1.3          0.3
86            6.7          3.1           4.7          1.5
76            6.8          2.8           4.8          1.4
71            6.1          2.8           4.0          1.3
134           6.1          2.6           5.6          1.4
51            6.4          3.2           4.5          1.5
73            6.1          2.8           4.7          1.2
54            6.5          2.8           4.6          1.5
63            6.1          2.9           4.7          1.4
37            4.9          3.1           1.5        

In [10]:
# check the accuracy of model
def accuracy(orignal_label, predicted_label):
    acc = accuracy_score(orignal_label, predicted_label, sample_weight=None)
    print("\nAccuracy :\n", acc * 100)

In [11]:
######
#Demo.
######
accuracy(ytest, ylabel)


Accuracy :
 100.0


In [12]:
if __name__ == "__main__":
    dataset_path = "D:\BTech\Machine Learning\KNN_Irish_Dataset\iris-flower-dataset\IRIS.csv"
    X, Y = read_csv(dataset_path)
    xtrain, xtest, ytrain, ytest = split(X, Y)
    train(xtrain, ytrain)
    label = predict(model, xtest)
    accuracy(ytest, label)


Given test data :
      sepal_length  sepal_width  petal_length  petal_width
114           5.8          2.8           5.1          2.4
62            6.0          2.2           4.0          1.0
33            5.5          4.2           1.4          0.2
107           7.3          2.9           6.3          1.8
7             5.0          3.4           1.5          0.2
100           6.3          3.3           6.0          2.5
40            5.0          3.5           1.3          0.3
86            6.7          3.1           4.7          1.5
76            6.8          2.8           4.8          1.4
71            6.1          2.8           4.0          1.3
134           6.1          2.6           5.6          1.4
51            6.4          3.2           4.5          1.5
73            6.1          2.8           4.7          1.2
54            6.5          2.8           4.6          1.5
63            6.1          2.9           4.7          1.4
37            4.9          3.1           1.5        