In [None]:
import numpy as np
import pandas as pd

In [None]:
class knn_classifier:
    '''
    knn algorithm assuming uniform weights,
    brute force algorithm, euclidian distance and
    that It's a binary classification problem.

    If k is even and there's a tie, we exclude the farthest point
    (so k is reduced by 1) and recalculate.
    '''
    def __init__(self,
                 n_neighbors = 5,
                 weights = 'uniform'):
        self.dataset = None
        self.labels = None
        self.k = n_neighbors

    @staticmethod
    def euclidian_distance(point_a:pd.Series, point_b:pd.Series):
        return np.linalg.norm(point_a - point_b)

    def fit(self,X_train, y_train):
        self.dataset = X_train
        self.labels = y_train

    def decide_predict(self, value_key_list):
        count = 0
        for elem in value_key_list:
            if self.labels.iloc[int(elem[1])] == 1:
                count +=1
            else:
                count -=1

        if self.k % 2 == 0 and count == 0: #if k is even and there's a tie, removes the farthest point and tries again
            return self.decide_predict(value_key_list[:- 1])

        else:
            return 1 if count > 0 else 0

    def predict(self,X_predict:pd.DataFrame):
        '''
        For each row in X_predict, we calculate the distance
        between the new point and every other point in the dataset
        '''
        y_pred = []
        for i in range(X_predict.shape[0]):  #iterating through the rows of the df to be predicted
            distances = {f"{i}": np.inf for i in range(self.dataset.shape[0])}#position in train dataset of the closest points so far
            for j in range(self.dataset.shape[0]):#iterating through the rows of the train dataset
                distances[f"{j}"] = self.euclidian_distance(X_predict.iloc[i], self.dataset.iloc[j])

            #get a list in the format [(closest_distance1, iloc1), (closest_distance2, iloc2)...]
            #get only k elements from this list (which are the k closest points)
            value_key_list = sorted([(value, key) for key, value in distances.items()])[:self.k]
            y_pred.append(self.decide_predict(value_key_list))


        return pd.Series(y_pred)



