In [121]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
import numpy as np
from collections import Counter
from sklearn.metrics import accuracy_score

In [115]:
class KNN:
    
    def __init__(self, k):
        self.k = k
    
    def fit(self, X, y):
        self.X_train = X
        self.y_train = y
    
    def predict(self, X):
        return [self.__predict(x) for x in X]
        
    def __predict(self, X):
        
        #Calculate distance between input and all points in the training dataset
        distances = [self.__get_distance(X, i) for i in self.X_train]
        
        #Find the indices and classes of the points with the smallest K distances
        smallestKDistances = np.argsort(distances)[:self.k]
        labels = [self.y_train[index] for index in smallestKDistances]
        
        #Return the class that corresponds to the majority of the nearest neighbors
        counter = Counter(labels)
        return counter.most_common()[0][0]
    
    def __get_distance(self, x1, x2):
        """Calculates euclidian distance between two points"""
        distance = np.sqrt(np.sum((x1-x2)**2))
        return distance
        

In [116]:
iris = datasets.load_iris()
X, y = iris.data, iris.target

In [117]:
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size = 0.2, random_state=123)

In [120]:
model = KNN(k=3)
model.fit(X_train, Y_train)
predictions = model.predict(X_test)

In [122]:
accuracy_score(Y_test, predictions)

0.9666666666666667