In [40]:
import csv
import random
import math
import operator

In [41]:
def loadDataset(filename, split, trainingSet=[], testSet=[]):
    with open(filename, "r") as csv_file:
        lines = csv.reader(csv_file)
        dataset = list(lines)
        for x in range(len(dataset)-1):
            for y in range(4):
                dataset[x][y] = float(dataset[x][y])
            if random.random() < split:
                trainingSet.append(dataset[x])
            else:
                testSet.append(dataset[x])

In [42]:
def euclideanDistance(instance1, instance2, length):
    distance = 0
    for x in range(length):
        distance += pow((instance1[x] - instance2[x]), 2)
    return math.sqrt(distance)

In [43]:
def getNeighbours(trainingSet, testInstance, k):
    distances = []
    length = len(testInstance)-1
    for x in range(len(trainingSet)):
        dist = euclideanDistance(trainingSet[x], testInstance, length)
        distances.append((trainingSet[x], dist))
    distances.sort(key=operator.itemgetter(1))
    neighbours = distances[:k][0]
    return neighbours

In [50]:
def getResponse(neighbours):
    classVotes = {}
    for x in range(len(neighbours)-1):
        response = neighbours[x][-1]
        if response in classVotes:
            classVotes[response] += 1
        else:
            classVotes[response] = 1
    sortedVotes = sorted(classVotes.items(), key=operator.itemgetter(1), reverse= True)
    return sortedVotes[0][0]

In [51]:
def getAccuracy(testSet, predictions):
    correct = 0
    for x in range(len(testSet)):
        if testSet[x][-1] == predictions[x]:
            correct += 1
    return (correct/float(len(testSet)))*100.0

In [57]:
def main():
    trainingSet = []
    testSet = []
    split = 0.5
    loadDataset('iris.data', split, trainingSet, testSet)
    print('Train Set:' + repr(len(trainingSet)))
    print('Test Set:' + repr(len(testSet)))
    predictions = []
    k = 3
    for x in range(len(testSet)):
        neighbours = getNeighbours(trainingSet, testSet[x], k)
        result = getResponse(neighbours)
        predictions.append(result)
        print('Predicted:' +repr(result)+' Actual:'+repr(testSet[x][-1]))
    accuracy = getAccuracy(testSet, predictions)
    print('Accuracy:'+repr(accuracy)+'%')

In [58]:
main()

Train Set:73
Test Set:77
Predicted:'Iris-setosa' Actual:'Iris-setosa'
Predicted:'Iris-setosa' Actual:'Iris-setosa'
Predicted:'Iris-setosa' Actual:'Iris-setosa'
Predicted:'Iris-setosa' Actual:'Iris-setosa'
Predicted:'Iris-setosa' Actual:'Iris-setosa'
Predicted:'Iris-setosa' Actual:'Iris-setosa'
Predicted:'Iris-setosa' Actual:'Iris-setosa'
Predicted:'Iris-setosa' Actual:'Iris-setosa'
Predicted:'Iris-setosa' Actual:'Iris-setosa'
Predicted:'Iris-setosa' Actual:'Iris-setosa'
Predicted:'Iris-setosa' Actual:'Iris-setosa'
Predicted:'Iris-setosa' Actual:'Iris-setosa'
Predicted:'Iris-setosa' Actual:'Iris-setosa'
Predicted:'Iris-setosa' Actual:'Iris-setosa'
Predicted:'Iris-setosa' Actual:'Iris-setosa'
Predicted:'Iris-setosa' Actual:'Iris-setosa'
Predicted:'Iris-setosa' Actual:'Iris-setosa'
Predicted:'Iris-setosa' Actual:'Iris-setosa'
Predicted:'Iris-setosa' Actual:'Iris-setosa'
Predicted:'Iris-setosa' Actual:'Iris-setosa'
Predicted:'Iris-setosa' Actual:'Iris-setosa'
Predicted:'Iris-setosa' Actual