In [17]:
import numpy as np

In [45]:
class KNearestNeighbour():
    def __init__(self,K):
        self.K = K

    def train(self,X,y):
        self.X_train = X
        self.y_train = y

    def predict(self,X_test,num_loops):

        if num_loops == 2:
            distances = self.compute_distance_two_loops(X_test) 
            return self.predict_labels(distances)
        
        elif num_loops == 1:
            distances = self.compute_distance_one_loops(X_test) 
            return self.predict_labels(distances)

    def compute_distance_one_loops(self,X_test):
        num_test = X_test.shape[0]
        num_train = self.X_train.shape[0]
        distances = np.zeros((num_test,num_train))

        for i in range(num_test):

            # example:
            # self.X_train = [[1, 2, 3],
                # [4, 5, 6],
                # [7, 8, 9],
                # [10, 11, 12]]

            # X_test[0,:] = [2, 2, 2]

            # self.X_train[i,:] - X_test[i,:](AUTOMATIC BROADCASTING) =
            # [[ 1,  2,  3],    [[2, 2, 2],
            # [ 4,  5,  6],  -  [2, 2, 2],
            # [ 7,  8,  9],     [2, 2, 2],
            # [10, 11, 12]]     [2, 2, 2]]

            distances[i,:] = np.sqrt(np.sum((self.X_train - X_test[i,:])**2,axis=1))
            # axis = 1 means sum along the second axis for higher-dimensional arrays
            # will result in 1D np array (second dim removed as only contains 1 element each)
            # (N,1) -> N

        return distances
    
    
    def compute_distance_two_loops(self,X_test):
        # X_test -> number of samples, number of attributes
        # X_train -> number of samples, number of attributes
        num_test = X_test.shape[0]
        num_train = self.X_train.shape[0]
        distances = np.zeros((num_test,num_train))

        for i in range(num_test):
            for j in range(num_train):
                distances[i,j] = np.sqrt(np.sum((X_test[i,:] - self.X_train[j,:])**2))

        return distances

    def predict_labels(self,distances):
        num_test = distances.shape[0]
        y_pred = np.zeros(num_test)

        for i in range(num_test):
            # returns indices according to value
            # eg; [18, 68,  8, 78, 88] -> array([2, 0, 1, 3, 4], dtype=int64)
            y_indices = np.argsort(distances[i,:]) 
            k_closest_classes = self.y_train[y_indices[:self.K]].astype(int) # get labels for closest k neighbours

            # what bincount does : [1,1,3,4,3,3] -> [0,2,0,3,1]
            # argmax does : [0,2,0,3,1] -> 3
            y_pred[i] = np.argmax(np.bincount(k_closest_classes))

        return y_pred

In [46]:
if __name__ == "__main__":
    X = np.loadtxt("example_data/data.txt",delimiter=",")
    y = np.loadtxt("example_data/targets.txt",delimiter=",")
    KNN = KNearestNeighbour(K=3)
    KNN.train(X,y)
    y_pred = KNN.predict(X,num_loops=1)

    # y_pred==y gives True, False at each index : array([False,  True, False,  True,  True, True,  True,  True])
    print(f'accuracy: {sum(y_pred==y)/y.shape[0]}')

accuracy: 0.9333333333333333
