In [3]:
import numpy as np
from scipy.stats import mode
from scipy.spatial.distance import euclidean

In [14]:
class KNN:
    '''
    KNN is a lazy supervised machine learning algorithm. 
    
    The user specifies the number of neighbors to find.'''
    
    
    def __init__(self, K=2, learning='classification'):
        '''
        K = number of neighbors
        learning = 'classification' or 'regression'
        '''
        assert learning == 'classification' or learning == 'regression', 'need to set learning.'
        self.K_ = K
        self.type_ = learning
        self._nrows = None
        self._indices = None
        self._X = None
    
    
    def _calc_distance(self):
        '''Calculate the distance between data points.
        Input:
            X = numpy data matrix
        Output:
            matrix of distance between each data point and each cluster
        '''
        self._nrows = len(self._X)
        distances = np.array([euclidean(self._X[i], self._X[j]) if i!=j else np.inf
                              for i in range(self._nrows) 
                              for j in range(self._nrows)])
        return distances.reshape(self._nrows, self._nrows)
    
    
    def _sort_indices(self, X):
        '''Sort distance matrix indices by shortest distance. 
        Input:
            X = numpy distance matrix from self._calc_distance
        Output:
            indices sorted by min distance
        '''
        return np.argsort(X, axis=1)

    
    def fit(self, X):
        '''Copy data.
        Input:
            X = numpy array of data points'''
        assert self.K_ < X.shape[0], 'You chose too many neighbors!'
        self._X = X
    
    
    def predict(self, y):
        '''Predict on new data.'''
        distances = self._calc_distance()
        self._indices = self._sort_indices(distances)
        self._indices = np.delete(self._indices, (self._indices.shape[1] - 1), axis=1)
        self._indices = self._indices[:,:self.K_]
        if self.type_ == 'classification':
            return mode(y[self._indices], axis=1)[0]
        else: 
            return np.mean(y[self._indices], axis=1)[0]

## Generate Data

In [4]:
X = np.array([0,0,1,2,4,9,5,10]).reshape(4,2)
X

array([[ 0,  0],
       [ 1,  2],
       [ 4,  9],
       [ 5, 10]])

In [5]:
y = np.array([1,1,1,0])
y

array([1, 1, 1, 0])

## Modeling

In [29]:
knn = KNN(K=3)

In [30]:
knn.fit(X)
knn.predict(y)

array([[1],
       [1],
       [1],
       [1]])