# **kNN from scratch**

Okapova Akerke

15BD02047

okapova.akerke@gmail.com

http://github.com/akerukesha

Example of kNN implemented from Scratch

In [6]:
import csv
import random
import math
import operator

In [7]:
def load_dataset(filename, split):
    training_set = []
    test_set = []
    with open(filename, 'r') as csvfile:
        lines = csv.reader(csvfile)
        dataset = list(lines)
        random.shuffle(dataset)
        print(len(dataset))
        s = int(split * len(dataset))
        training_set = dataset[:s]
        test_set = dataset[s:]
    return training_set, test_set

In [45]:
split = 0.67
training_set, test_set = load_dataset('iris.txt', split)
training_set[:5]

150


[['5.4', '3.9', '1.3', '0.4', 'Iris-setosa'],
 ['5.3', '3.7', '1.5', '0.2', 'Iris-setosa'],
 ['5.0', '3.4', '1.6', '0.4', 'Iris-setosa'],
 ['4.5', '2.3', '1.3', '0.3', 'Iris-setosa'],
 ['6.5', '3.0', '5.5', '1.8', 'Iris-virginica']]

In [14]:
def euclidean_distance(instance1, instance2, length):
    distance = 0
    for x in range(length):
        distance += pow((float(instance1[x]) - float(instance2[x])), 2)
    return math.sqrt(distance)

In [15]:
def get_neighbors(training_set, test_instance, k):
    distances = []
    length = len(test_instance)-1
    
    for train_instance in training_set:
        dist = euclidean_distance(test_instance, train_instance, length)
        distances.append((train_instance, dist))
    
    distances.sort(key=operator.itemgetter(1))
    neighbors = []
    for x in range(k):
        neighbors.append(distances[x][0])
    return neighbors

In [16]:
def get_response(neighbors):
    class_votes = {}
    for n in neighbors:
        response = n[-1]
        if response in class_votes:
            class_votes[response] += 1
        else:
            class_votes[response] = 1
    sorted_votes = sorted(class_votes.items(), key=operator.itemgetter(1), reverse=True)
    return sorted_votes[0][0]

In [17]:
def get_accuracy(results):
    correct = 0
    for result in results:
        correct += int(result[0] == result[1])
    return (float(correct)/float(len(results))) * 100.0

In [46]:
def main(split=0.67, k=10, show=10):
    training_set, test_set = load_dataset('iris.txt', split)
    
    results = []

    for instance in test_set:
        neighbors = get_neighbors(training_set, instance, k)
        result = get_response(neighbors)
        results.append([instance[-1], result])

    accuracy = get_accuracy(results)
    print("Accuracy: {0}%".format(accuracy))

    print("\nSample results:")
    for result in results[:show]:
        print("Actual flower: \"{0}\", predicted flower: \"{1}\"".format(result[0], result[1]))
    
    print("\nWrong results:")
    for result in results:
        if result[0] != result[1]:
            print("Actual flower: \"{0}\", predicted flower: \"{1}\"".format(result[0], result[1]))

In [47]:
main()

150
Accuracy: 94.0%

Sample results:
Actual flower: "Iris-versicolor", predicted flower: "Iris-versicolor"
Actual flower: "Iris-versicolor", predicted flower: "Iris-versicolor"
Actual flower: "Iris-versicolor", predicted flower: "Iris-versicolor"
Actual flower: "Iris-virginica", predicted flower: "Iris-virginica"
Actual flower: "Iris-versicolor", predicted flower: "Iris-versicolor"
Actual flower: "Iris-versicolor", predicted flower: "Iris-versicolor"
Actual flower: "Iris-setosa", predicted flower: "Iris-setosa"
Actual flower: "Iris-setosa", predicted flower: "Iris-setosa"
Actual flower: "Iris-versicolor", predicted flower: "Iris-versicolor"
Actual flower: "Iris-virginica", predicted flower: "Iris-versicolor"

Wrong results:
Actual flower: "Iris-virginica", predicted flower: "Iris-versicolor"
Actual flower: "Iris-virginica", predicted flower: "Iris-versicolor"
Actual flower: "Iris-versicolor", predicted flower: "Iris-virginica"


In [48]:
main(k=20)

150
Accuracy: 92.0%

Sample results:
Actual flower: "Iris-versicolor", predicted flower: "Iris-versicolor"
Actual flower: "Iris-versicolor", predicted flower: "Iris-virginica"
Actual flower: "Iris-setosa", predicted flower: "Iris-setosa"
Actual flower: "Iris-versicolor", predicted flower: "Iris-virginica"
Actual flower: "Iris-setosa", predicted flower: "Iris-setosa"
Actual flower: "Iris-setosa", predicted flower: "Iris-setosa"
Actual flower: "Iris-virginica", predicted flower: "Iris-virginica"
Actual flower: "Iris-versicolor", predicted flower: "Iris-versicolor"
Actual flower: "Iris-virginica", predicted flower: "Iris-virginica"
Actual flower: "Iris-setosa", predicted flower: "Iris-setosa"

Wrong results:
Actual flower: "Iris-versicolor", predicted flower: "Iris-virginica"
Actual flower: "Iris-versicolor", predicted flower: "Iris-virginica"
Actual flower: "Iris-versicolor", predicted flower: "Iris-virginica"
Actual flower: "Iris-versicolor", predicted flower: "Iris-virginica"
