In [1]:
import numpy as np

In [2]:
class KNNClassifier:
    """
    Classification by k nearest neighbors 
    and euclidian distance
    """
    
    def __init__(self, k=5):
        self.k = k    
    
    def fit(self, X, y):
        self.X = X
        self.y = y
    
    @staticmethod
    def get_majority_class(y):
        y = np.array(y)
        classes = np.unique(y)
        return int(sorted([(c, (y == c).sum()) for c in classes], 
                          key=lambda x: x[1])[-1][0])            
    
    def _predict_sample(self, x):
        x = x.reshape(1, -1)
        X_sample = np.repeat(x.reshape(1, -1), repeats=len(self.X), axis=0)
        distances = np.sum((self.X - X_sample) ** 2, axis=1)
        top_k_classes = sorted(zip(distances, self.y), key=lambda x: x[0])[:self.k]
        return self.get_majority_class(top_k_classes)    
    
    def predict(self, X):
        
        if self.X is None:
            raise ValueError("Please call fit method")
            
        y_predicted = []
        for x in X:
            y_predicted.append(self._predict_sample(x))
        
        return np.array(y_predicted)        

# Test

In [3]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
import matplotlib.pyplot as plt
%matplotlib inline

In [4]:
data = load_breast_cancer()
X = data.data
y = data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                    test_size=0.2, 
                                                    random_state=43)
print('X_train.shape:', X_train.shape)
print('X_test.shape:', X_test.shape)
print('y_train.shape:', y_train.shape)
print('y_test.shape:', y_test.shape)

('X_train.shape:', (455, 30))
('X_test.shape:', (114, 30))
('y_train.shape:', (455,))
('y_test.shape:', (114,))


In [5]:
model_knn = KNNClassifier(k=5)
model_knn_sk = KNeighborsClassifier(n_neighbors=5)

In [6]:
# model
model_knn.fit(X_train, y_train)
y_predicted = model_knn.predict(X_test)
# model sk
model_knn_sk.fit(X_train, y_train)
y_predicted_sk = model_knn_sk.predict(X_test)

In [7]:
print((y_test == y_predicted).mean())
print((y_test == y_predicted_sk).mean())

0.956140350877193
0.956140350877193
