# K-Nearest Neighbors 



In [33]:
from scipy.spatial.distance import euclidean
import numpy as np
def distance(a, b, c=2, verbose = True ):
    x = np.array(a)
    y = np.array(b)
    inside = np.power(abs(x-y), c)
    summation = sum(inside)
    mink = np.power(summation, 1/c)
    
    if verbose == True:
        if c == 1:
            print(f'Manhattan Distance:{mink}')
            return mink
        elif c== 2:
            print(f'Euclidean Distance:{mink}')
            return mink
        elif c>2 :
            print (f'Minkowski Distance:{mink}')
            return mink
    else:
        return mink

## Create the `KNN` class

In [3]:
# Define the KNN class with two empty methods - fit and predict
class KNN:
    def __init__(self):
        pass
    
    def fit():
        pass
    
    def predict():
        pass




The inputs for this function should be:

* `self`: since this will be an instance method inside the `KNN` class 
* `X_train`: an array, each row represents a _vector_ for a given point in space  
* `y_train`: the corresponding labels for each vector in `X_train`. The label at `y_train[0]` is the label that corresponds to the vector at `X_train[0]`, and so on  


In [5]:
def fit(self, X_train, y_train):
    self.X_train = X_train
    self.y_train = y_train
    
    
# This line updates the knn.fit method to point to the function you've just written
KNN.fit = fit

In [35]:
def _get_distances(self, x):
    """
    * Take in two arguments: `self` and `x`
    * Create an empty array, `distances`, to hold all the distances you're going to calculate
    * Enumerate through every item in `self.X_train`. For each item: 
        * Use the `euclidean()` function to get the distance between x and the current point from `X_train` 
        * Create a tuple containing the index and the distance (in that order!) and append it to the `distances` array 
    * Return the `distances` array when a distance has been generated for all items in `self.X_train` 
    """
    distances = []
    for ind, val in enumerate(self.X_train):
        dist_to_i = euclidean(x, val)
        distances.append((ind, dist_to_i))
    return distances
    

# This line attaches the function you just created as a method to KNN class 
KNN._get_distances = _get_distances

In [34]:
def _get_k_nearest(self, dists, k):
    """* Take three arguments:
    * `self`
    * `dists`: an array of tuples containing (index, distance), which will be output from the `_get_distances()` method. 
    * `k`: the number of nearest neighbors you want to return
    * Sort the `dists` array by distances values, which are the second element in each tuple
    * Return the first `k` tuples from the sorted array """
    sorted_dists = sorted(dists, key = lambda x: x[1])
    return sorted_dists[:k]

# This line attaches the function you just created as a method to KNN class 
KNN._get_k_nearest = _get_k_nearest

In [37]:
def _get_label_prediction(self, k_nearest):
    """
    * Create a list containing the labels from `self.y_train` for each index in `k_nearest` (remember, each item in `k_nearest` is a tuple, and the index is stored as the first item in each tuple)
    * Get the total counts for each label (use `np.bincount()` and pass in the label array created in the previous step)
    * Get the index of the label with the highest overall count in counts (use `np.argmax()` for this, and pass in the counts created in the previous step) 
    """
    labels = []
    for i in k_nearest:
        labels.append(self.y_train[i[0]])
    total_counts = np.bincount(labels)
    highest_count = np.argmax(total_counts)
    return highest_count   
    

# This line attaches the function you just created as a method to KNN class
KNN._get_label_prediction = _get_label_prediction

In [38]:
def predict(self, X_test, k=3):
    """
    * `X_test`: the points we want to classify
    * `k`: which specifies the number of neighbors we should use to make the classification.  Set `k=3` as a default, but allow the user to update it if they choose 
    """
    preds = []
    for i in X_test:
        distance = self._get_distances(i)
        k_nearest = self._get_k_nearest(distance, k)
        prediction = self._get_label_prediction(k_nearest)
        preds.append(prediction)
    return preds
    

# This line updates the knn.predict method to point to the function you've just written
KNN.predict = predict


## Test the KNN classifier



In [39]:
# Import the necessary functions
import sklearn.datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

iris = sklearn.datasets.load_iris()
data = iris.data
target = iris.target

In [17]:
X_train, X_test, y_train, y_test = train_test_split(data, target, random_state = 0)

In [20]:
# Instantiate and fit KNN
knn = KNN()
model = knn.fit(X_train, y_train)

In [30]:
# Generate predictions
preds = knn.predict(X_test)

In [32]:
print("Testing Accuracy: {}".format(accuracy_score(y_test, preds)))
# Expected Output: Testing Accuracy: 0.9736842105263158

Testing Accuracy: 0.9736842105263158
