### Classifying Iris Data With KNN

In [1]:
#Required Datasets
import csv
import math
import random
import operator

In [2]:
#Loading the dataset 
def loadDataset(filename, split, trainingSet=[], testSet=[]):
    '''
     Function to load the dataset and split it into training and testing sets     
    '''
    with open(filename,'r') as csvfile:
        lines = csv.reader(csvfile)
        dataset = list(lines)
        for i in range(1,len(dataset)):
            for j in range(4): 
                dataset[i][j] = float(dataset[i][j])
            if random.random() < split:
                trainingSet.append(dataset[i])
            else:
                testSet.append(dataset[i])

In [3]:
#Distance Calculation
def euclideanDistance(point1, point2, length):
    '''
        This Fn Calculates The Distance B/W The Given Two Points
    '''
    distance = 0
    for i in range(length):
        distance += pow((point1[i]-point2[i]),2)
    return math.sqrt(distance)

In [4]:
def getNeighbors(trainingSet, testInstance, k):
    '''
        This Fn Returns The K Nearest Points For A Given Test Instance
    '''
    distances = []
    neighbors = []
    length = len(testInstance) - 1
    for i in range(len(trainingSet)):
        dis= euclideanDistance(testInstance, trainingSet[i], length)
        distances.append((trainingSet[i],dis))
    distances.sort(key=operator.itemgetter(1))
    
    for j in range(k):
        neighbors.append(distances[j][0])
        
    return neighbors 

In [5]:
def getResponse(neighbors):
    '''
        This Functions Returns Predicted Class Based On Input Neighbors 
    '''
    votes = {}
    for neighbor in neighbors:
        vote = neighbor[-1]
        if vote in votes:
            votes[vote] += 1
        else:
            votes[vote] = 1
    sortedVotes = sorted(votes.items(), key=operator.itemgetter(1), reverse=True)    
    return sortedVotes[0][0] 

In [6]:
def getAccuracy(testSet, predictions):
    '''
        This Fn Returns The Accuracy 
    '''
    correct = 0
    for i in range(len(testSet)):
        if testSet[i][-1] == predictions[i]:
            correct += 1
            
    return (correct/float(len(testSet)))*100.0        

In [7]:
def main():
    trainingSet = []
    testSet = []

    loadDataset('iris.csv', 0.80, trainingSet, testSet)
    print('Length of training set: ',len(trainingSet))
    print('Length of testing set: ',len(testSet))
    
    predictions = []
    k = 3
    
    for i in range(len(testSet)):
        neighbors = getNeighbors(trainingSet, testSet[i], k)
        result = getResponse(neighbors)
        predictions.append(result)
        
    accuracy = getAccuracy(testSet, predictions)
    
    print('Accuracy: ', accuracy)
    
    
main()    

Length of training set:  117
Length of testing set:  33
Accuracy:  96.96969696969697
