In [1]:
import asyncio

from scipy.stats import mode
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plot

In [2]:
classes = ['plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()

In [23]:
class KNearestNeighbor:

    def __init__(self, distance='l1', k=1):
        self.x_train = None
        self.y_train = None
        self.distance = distance
        self.k = k
    
    def train(self, x, y):
        """
        Training is simply going to be 'memorizing' the training data.
        So nothing is really 'learn'. We just copy the pointers into our
        class.

        params:
            x - numpy array of training images
            y - numpy array of testing labels
        """
        self.x_train = x
        self.y_train = y

    async def l2_distance(self, train, test):
        """
        Computes the 'euclidean' distance which
        is the square root of the sum of the squares
        between the two numpy matrixes train and test
        """
        # squares all the train and test after subtracting
        squares = np.square(train - test)

        # sum all the squares up
        sums = np.sum(squares, axis=1)

        # and compute the square root of that
        distances = np.sqrt(sums)

        return distances
    
    async def l1_distance(self, train, test):
        """
        Computes the 'manhattan' distance which
        is the sum of the absolute value between
        the two numpy matrixes train and test
        """
        # subtract the pixels from each other and get absolute value
        absolute = np.abs(train - test)

        # sum up all the differences into one number for each
        # training image. distances shape = (x_train.shape[0])
        distances = np.sum(absolute, axis=1)

        return distances

    async def predict(self, x):
        """
        Loops through all the images in x and performs
        nearest neighbors sum of all the pixels between
        the selected x image, and the images in the training
        set. Then, it selects the minimum value from these
        distances.

        params:
            x - numpy array of test images
        """
        # total amount of x test images
        length = x.shape[0]

        # creating a shape = (length) zeroed array for predicitions
        y_pred = np.zeros(length, dtype=self.y_train.dtype)

        futures = []

        # looping through testing images and running concurrently!
        for i in range(length):
            if self.distance == 'l1':
                futures.append(self.l1_distance(self.x_train, x[i, :]))
            elif self.distance == 'l2':
                futures.append(self.l2_distance(self.x_train, x[i, :]))
        distance_list = await asyncio.gather(*futures)

        for i in range(length):
            distances = distance_list[i]

            # Find the minimum number from all those distances
            # and get the indices up to the k value
            indicies = np.argsort(distances)
            min_indicies = indicies[:self.k]

            # get the predictions and pick the most voted on!
            preds = self.y_train[min_indicies, 0]
            voted = np.bincount(preds).argmax()
            y_pred[i] = voted
        
        return y_pred

In [24]:
# flatten the images in the dataset so we have a array of pixels
# not a 3 dimensional width, height, depth matrixs
x_train_flat = x_train.reshape(x_train.shape[0], 32 * 32 * 3)
x_test_flat = x_test.reshape(x_test.shape[0], 32 * 32 * 3)

distances = ['l1', 'l2']
knn = [5, 3, 1]
sample = 10

for d in distances:
    for k in knn:
        model = KNearestNeighbor(k=k, distance=d)
        model.train(x_train_flat, y_train)
        y_pred = await model.predict(x_test_flat[:sample])
        accuracy = np.mean(y_pred == y_test[:sample].reshape(sample))
        print(f'k={k}, distance={d}, accuracy={accuracy*100}%')


[8 5 0 8 0]
[8 1 8 1 1]
[1 8 9 8 8]
[0 0 8 0 8]
[4 5 0 0 3]
[2 0 6 2 4]
[2 4 4 2 4]
[5 2 0 6 4]
[0 0 5 0 0]
[8 8 1 8 8]
k=5, distance=l1, accuracy=20.0%
[8 5 0]
[8 1 8]
[1 8 9]
[0 0 8]
[4 5 0]
[2 0 6]
[2 4 4]
[5 2 0]
[0 0 5]
[8 8 1]
k=3, distance=l1, accuracy=20.0%
[8]
[8]
[1]
[0]
[4]
[2]
[2]
[5]
[0]
[8]
k=1, distance=l1, accuracy=20.0%
[8 8 0 0 2]
[0 9 0 8 1]
[8 8 8 1 0]
[0 2 2 0 0]
[2 4 6 6 5]
[6 6 0 6 6]
[5 3 2 6 2]
[2 0 4 2 6]
[2 7 7 3 2]
[6 9 2 7 5]
k=5, distance=l2, accuracy=40.0%
[8 8 0]
[0 9 0]
[8 8 8]
[0 2 2]
[2 4 6]
[6 6 0]
[5 3 2]
[2 0 4]
[2 7 7]
[6 9 2]
k=3, distance=l2, accuracy=20.0%
[8]
[0]
[8]
[0]
[2]
[6]
[5]
[2]
[2]
[6]
k=1, distance=l2, accuracy=30.0%
