# K Nearest Neighbours (KNN)

In [1]:
# Imports 
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('seaborn')

from scipy import stats
from sklearn.datasets import make_classification, make_blobs
from sklearn.model_selection import train_test_split

## Distance computation

In [2]:
def euclidean_distance(x1, x2):
    '''Computes Euclidean distance between two points x1 and x2
    
    Args:
        x1: Point 1
        x2: Point 2

    returns:
        dist: Euclidean distance
    '''
    dist = np.sum((x1 -x2)**2, axis = 1)
    return dist

def manhattan_distance(x1, x2):
    '''Computes Manhattan distance between two points x1 and x2
    
    Args:
        x1: Point 1
        x2: Point 2

    returns:
        dist: Manhattan distance
    '''
    dist = np.sum(np.abs(x1 -x2), axis = 1)
    return dist

## KNN Implementation

In [4]:
class KNN:
    def __init__(self, k, distance_metric = euclidean_distance, task_type = "Classification"):

        self._k = k
        self._distance_metric = distance_metric
        self._task_type = task_type

    def fit(self, X, y):
        ''' Stores training data for KNN
        Args:
            X: Feature matrix
            y: labels
        
        Returns:
             ------------
        '''
        self._X = X
        self._y = y

    def predict(self, new_example):
        '''Predict the class labels for a new example supplied
        Args:
            new_example: New examples

        Returns:
            label: Predicted labels for new_example
            knn_indices: Indices of K nearest neighbours
        '''

        distance_vector  = self._distance_metric(self._X, new_example)

        knn_indices = np.argpartition(distance_vector, self._k)[:self._k]

        knn = self.y[knn_indices]

        if self._task_type == 'Classification':
            label = stats.mode(knn)[0]

        else:
            label = knn.mean()

        return label, knn_indices

    def eval(self, X_test, y_test):
        '''Evaluates KNN
        Args:
            X_test: feature matrix
            y_test: labels

        Returns:
            Misclassification error
        '''
        if self._task_type == 'Classification':
            y_pred = np.zeros(y_test.shape)
            for i in range(y_test.shape[0]):
                y_pred[i], _ = self.predict(X_test[i,:]) 
            error = np.mean(y_test == y_pred, axis = 0)
        else:
            y_pred = np.zeros(y_test.shape)
            for i in range(y_test.shape[0]):
                y_pred[i], _ = self.predict(X_test[i,:]) 
            error_vector = y_pred - y_test
            error = np.sqrt((error_vector.T @ error_vector)/ error_vector.ravel().shape[0])

        return error