# Implementation of _kNN_ using `numpy`

In [1]:
from __future__ import division
import numpy as np

from collections import Counter

def euclidean_distance(a, b):
    return np.linalg.norm(a-b)

def cosine_distance(a, b):
    a_mod = np.linalg.norm(a)
    b_mod = np.linalg.norm(b)

    return a.dot(b)/a_mod/b_mod


class KNearestNeighbors(object):

    def __init__(self, k=5, distance=euclidean_distance):
        self.k = k
        self.distance = distance

    def fit(self, X, y):
        self.X_train = X
        self.y_train = y

    def predict(self, X_test):
        '''
        INPUT numpy array
        Find the distance of all test points with all points in the data matrix
        '''
        return np.apply_along_axis(self.classify, 1, X_test)

    def classify(self, x):
        '''
        INPUT numpy array
        
        Find distance of input point from all the points in data matrix
        '''
        dist = np.apply_along_axis(lambda x_i: self.distance(x_i, x), 1,self.X_train)
        yk = self.y_train[dist.argsort()][:self.k]
        
        return Counter(yk).most_common(1)[0][0]

### Use case

In [2]:
from sklearn import datasets
iris = datasets.load_iris()
X = iris.data
y = iris.target

In [3]:
from sklearn.cross_validation import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [4]:
knn = KNearestNeighbors(5, euclidean_distance)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)

In [5]:
def error_rate(y_test, y_pred):
    return sum([1 for yt,yp in zip(y_test,y_pred) if yt != yp]) / len(y_pred)

def accuracy(y_test, y_pred):
    return 1 - error_rate(y_test,y_pred)

In [6]:
print "Error rate: ", error_rate(y_test, y_pred)
print "Accuracy: ", accuracy(y_test, y_pred)

Error rate:  0.0333333333333
Accuracy:  0.966666666667
