In [1]:
import numpy as np
from sklearn.metrics.pairwise import euclidean_distances
import matplotlib.pyplot as plt
from sklearn import datasets, preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [2]:
class KNN():
    def __init__(self, k=5):
        self.k = k
        
    def predict(self, X_test, X_train, y_train):
        y_predict = np.zeros(X_test.shape[0])
        
        for i in range(X_test.shape[0]):
            # Initialize the distances matrix
            distances = np.zeros(X_train.shape[0], 2)
            
            for j in range(X_train.shape[0]):
                # Calculate the eucliean distance
                dis = euclidean_distances(X_test[i], X_train[j])
                # The corresponding label
                label = y_train[j]
                # Add the distance and the corresponding label pair to the matrix
                distances[j] = [dis, label]
                
                # Sort the distances and get the closest k nearest neighbors
                knns = distances[distances[:, 0].argsort()][:self.k]
                
                # Count the frequency of labels in the nearest neighbors
                counts = np.bincount(knns[:, 1].astype('int'))
                
                # Asign the most frequent label to the testing data
                testLabel = counts.argmax()
                y_predict[i] = testLabel
                
        return y_predict

In [3]:
def main():
    data = datasets.load_iris()
    X = preprocessing.normalize(data.data)
    y = data.target
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)
    
    # Initilize KNN
    model = KNN(k=5)
    y_pred = model.predict(X_test, X_train, y_train)
    accuracy = accuracy_score(y_test, y_pred)
    
    print('Accuracy:', accuracy)
    
    
#if __name__ == '__main__':
    #main()