# KNearestNeighbor class 
- `fit()` method
- `predict()` method

In [23]:
from math import sqrt

In [41]:
class KNearestNeighbor:
    
    def __init__(self, n_neighbors):
        self.n_neighbors = n_neighbors
        self.train = None
    
    def __euclidean_distance(self, row1, row2):
        """
        The square root of the sum of the squared differences between two vectors.
        The smaller the value, the more similar two records will be.
        Value of 0 indicates no difference.
        
        euclidian distance = sqrt(sum i to N (x1_i - x2_i)^2)
        """
        # 0.0 so that distance will float
        distance = 0.0

        # loop for columns
        for i in range(len(row1) - 1):
            # squared difference between the two vectors
            distance += (row1[i] - row2[i])**2

        return sqrt(distance)
    
    
    def fit(self, train):
        """Fits model to training data"""
        self.train = train
        
    def __get_neighbors(self, train, new_obs, k):
        """
        Locates most similar neighbors via euclidian distance.

        Params: 

            train: a dataset

            new_obs: a new observation; observation for which neighbors are to be found

            k: k-neighbors; the number of neighbors to be found (int)
        """

        distances = []
        neighbors = []

        for i,row in enumerate(train):
            # calculate distance
            d = self.__euclidean_distance(new_obs, row)

            # fill distances list with tuples of row index and distance
            distances.append((i, d))

            # sort distances by second value in tuple
            distances.sort(key=lambda tup: tup[1])

        for i in range(k):
            # Grabs k-records from distances list
            neighbors.append(distances[i])

        return neighbors
    
    
    def predict(self, new_obs):
        """
        Predicts a class label on a new observation from provided training data.

        Params: 

            new_obs: a new observation; observation for which neighbors are to be found

            k: k-neighbors; the number of neighbors to be found (int)
        """
        # Compile list of neighbors
        neighbors = self.__get_neighbors(self.train, new_obs, self.n_neighbors)
        
        # Grab index of the closest neighbor
        n_index = neighbors[0][0]

        # Prediction is the label from train record at n_index location. Assumes label
        # is at end of record.
        pred = self.train[n_index][-1]

        return pred

In [2]:
dataset = [
    [2.7810836,2.550537003,0],
    [1.465489372,2.362125076,0],
    [3.396561688,4.400293529,0],
    [1.38807019,1.850220317,0],
    [3.06407232,3.005305973,0],
    [7.627531214,2.759262235,1],
    [5.332441248,2.088626775,1],
    [6.922596716,1.77106367,1],
    [8.675418651,-0.242068655,1],
    [7.673756466,3.508563011,1]
]

In [42]:
nn = KNearestNeighbor(n_neighbors=3)

In [43]:
nn.fit(dataset)

In [44]:
new = [8.675418651,-0.242068655,1]
nn.predict(new)

1

In [45]:
samp = [
    [2.7810836,2.550537003],
    [1.465489372,2.362125076],
    [3.396561688,4.400293529],
    [1.38807019,1.850220317],
    [3.06407232,3.005305973],
    [7.627531214,2.759262235],
    [5.332441248,2.088626775],
    [6.922596716,1.77106367],
    [8.675418651,-0.242068655],
    [7.673756466,3.508563011]
]

predictions = []

for obs in samp:
    pred = nn.predict(obs)
    predictions.append(pred)

predictions

[0, 0, 0, 0, 0, 1, 1, 1, 1, 1]