In [1]:
class KNearestNeighborsClassifier:
    from collections import Counter
    import numpy as np
    
    # K: the number of training data points joining the voting
    def __init__(self, K):
        self.train_inputs = None
        self.train_outputs = None
        self.K = K

    # fit: train this model on training inputs X and outputs Y
    # X: training inputs -- np.ndarray
    #      (shape: [# of data points, # of features])
    # Y: training outputs -- np.ndarray
    #      (shape: [# of data points])
    def fit(self, X, Y):
        self.train_inputs = X
        self.train_outputs = Y
        pass

    # predict: classify given data points
    # X: inputs to the classifier -- np.ndarray
    #      (shape: [# of data points, # of features])
    def predict(self, X):
        output = []
        for i in range(X.shape[0]):
            # Calculate the distances between train inputs and test inputs
            distances = np.sqrt( ( (self.train_inputs - X[i]) ** 2. ).sum(axis=1) )
            distances = np.array(distances)
            
            # Sort by distance (asc) and return indexes
            nearest_indexes = distances.argsort(axis=0)
            
            # Obtain K nearest neighbors
            neighbors = []
            for k in range(self.K):
                neighbors.append(self.train_outputs[nearest_indexes[k]])
                        
            # Obtain the most frequent pattern
            cnt = Counter()
            cnt.update(neighbors)
            
            # cnt.most_common() looks like [(2,4), (0,1)]
            # In the above example, we choose '2', which is the most frequent class
            predicted_class = cnt.most_common()[0][0]
            output.append(predicted_class)
        
        return output
    
        pass

In [2]:
# check this is a main file
if __name__ == '__main__':
    from collections import Counter
    import numpy as np
    from sklearn.datasets import load_iris
    from sklearn.model_selection import train_test_split

    iris_dataset = load_iris()
    X_train, X_test, Y_train, Y_test = train_test_split(iris_dataset.data,
                                                        iris_dataset.target,
                                                        random_state=0)
    knn = KNearestNeighborsClassifier(3)

In [3]:
knn.fit(X_train, Y_train)

In [4]:
X_test_predict = knn.predict(X_test)
print(X_test_predict)

[2, 1, 0, 2, 0, 2, 0, 1, 1, 1, 2, 1, 1, 1, 1, 0, 1, 1, 0, 0, 2, 1, 0, 0, 2, 0, 0, 1, 1, 0, 2, 1, 0, 2, 2, 1, 0, 2]


In [5]:
accuracy = np.sum(Y_test == X_test_predict) / Y_test.shape[0]
print('acc:', accuracy)
assert(accuracy > 0.7)

acc: 0.9736842105263158
