In [1]:
#Importing important Libraries
import numpy as np
import pandas as pd
import math
import random

In [2]:
#Function for reading and converting file to a float array
def file_to_numpy(file):
    d = pd.read_csv(file, header = None)
    return d.to_numpy();

In [3]:
#Function for calculating the Euclidean distance between two rows
def E_Distance(row1, row2):
    distance = 0
    for i in range(len(row1)):
        distance += (row1[i] - row2[i])**2
    return math.sqrt(distance)

In [4]:
#Function for returning the n nearest neighbors
def KNN(train, test_row, n):  
    selected = []
    for train_row in train:
        selected.append((train_row, E_Distance(train_row[:-1], test_row)))
    selected.sort(key=lambda x : x[-1])
    return selected[:n]

In [5]:
#Function for predicting the class with the help of n nearest neighbors
def classify_KNN(train,test,n):
    selected = KNN(train,test,n)
    classes = []
    for row in selected:
        classes.append(row[0][-1])
    return max(classes, key=classes.count)

In [6]:
#Function for dividing the dataset into R folds
def datasetsplit(train, R):
    split = []
    copy = list(train)
    foldsize = int(len(train)/R)
    for i in range(R):
        fold = []
        while len(fold) < foldsize:
            index = random.randrange(len(copy))
            fold.append(copy.pop(index))
        split.append(fold)
    return split

In [7]:
#Function for calculating the error rate of the predicted values
def error_rate(predicted_classes, correct_set):
    count = 0;
    for i in range(len(predicted_classes)):
        if (predicted_classes[i] != correct_set[i][-1]):
            count +=1
    return count/len(correct_set) * 100

In [8]:
#Function for predicting multiple rows in a dataset
def Predict_test(train,test,k):
    predicted = []
    for test_row in test:
        predicted.append(classify_KNN(train,test_row,k))
    return predicted

In [9]:
#Funciton for converting the data into 2 dimensional data by Calculating the Area of Petal and Sepal
def convert_to_2d(dataset):
    new_dataset = []
    for row in dataset:
        new_dataset.append((row[0]*row[1], row[2]*row[3], row[4]))
    return new_dataset

In [10]:
#Function for R fold cross validation to calculate the optimum value of K
def R_fold_validation(train, R, k_range):
    folds = datasetsplit(train, R)
    errors = []
    g = 0
    for f in folds:
        training = list(folds)
        training = training[:g] + training[g+1:]
        train_data = []
        for group in training:
            for row in group:
                train_data.append(row)
        e = []
        for k in range(k_range):
            predicted = Predict_test(train_data,f,k+1)
            e.append(error_rate(predicted, f))
        errors.append(e)
        g += 1
    mean = []
    for i in range(k_range):
        sum = 0
        for j in range(R):
            sum += errors[j][i]
            sum /= R
        mean.append(sum)
         
    return mean.index(min(mean))+1

In [11]:
#The Data consist of the Length, Breadth of petal, sepal, and the type of the Flower 
train_data = file_to_numpy('train.csv')
test_data = file_to_numpy('test.csv')

train_data = convert_to_2d(train_data)
test_data = convert_to_2d(test_data)

K_range = 20
R = int(len(train_data)/K_range)

K = R_fold_validation(train_data, 4, 20)
predict = Predict_test(train_data,test_data,K)

In [12]:
#Printing all expected and predicted values with the final accuracy rate
print("Optimum value of K for the dataset: ", K)
print("Accuracy: ", 100 - int(error_rate(predict,test_data)),"%")
for i in range(len(test_data)):
    print("Expected: ",test_data[i][-1],", Got: ", predict[i])

Optimum value of K for the dataset:  1
Accuracy:  100 %
Expected:  Iris-setosa , Got:  Iris-setosa
Expected:  Iris-setosa , Got:  Iris-setosa
Expected:  Iris-setosa , Got:  Iris-setosa
Expected:  Iris-setosa , Got:  Iris-setosa
Expected:  Iris-setosa , Got:  Iris-setosa
Expected:  Iris-setosa , Got:  Iris-setosa
Expected:  Iris-setosa , Got:  Iris-setosa
Expected:  Iris-setosa , Got:  Iris-setosa
Expected:  Iris-setosa , Got:  Iris-setosa
Expected:  Iris-setosa , Got:  Iris-setosa
Expected:  Iris-versicolor , Got:  Iris-versicolor
Expected:  Iris-versicolor , Got:  Iris-versicolor
Expected:  Iris-versicolor , Got:  Iris-versicolor
Expected:  Iris-versicolor , Got:  Iris-versicolor
Expected:  Iris-versicolor , Got:  Iris-versicolor
Expected:  Iris-versicolor , Got:  Iris-versicolor
Expected:  Iris-versicolor , Got:  Iris-versicolor
Expected:  Iris-versicolor , Got:  Iris-versicolor
Expected:  Iris-versicolor , Got:  Iris-versicolor
Expected:  Iris-versicolor , Got:  Iris-versicolor
