In [None]:
import asyncio

import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plot

In [None]:
classes = ['plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()

In [None]:
class NearestNeighbor:

    def __init__(self, distance='l1'):
        self.x_train = None
        self.y_train = None
        self.distance = distance
    
    def train(self, x, y):
        """
        Training is simply going to be 'memorizing' the training data.
        So nothing is really 'learn'. We just copy the pointers into our
        class.

        params:
            x - numpy array of training images
            y - numpy array of testing labels
        """
        self.x_train = x
        self.y_train = y

    async def l2_distance(self, train, test):
        """
        Computes the 'euclidean' distance which
        is the square root of the sum of the squares
        between the two numpy matrixes train and test
        """
        # squares all the train and test after subtracting
        squares = np.square(train - test)

        # sum all the squares up
        sums = np.sum(squares, axis=1)

        # and compute the square root of that
        distances = np.sqrt(sums)

        return distances
    
    async def l1_distance(self, train, test):
        """
        Computes the 'manhattan' distance which
        is the sum of the absolute value between
        the two numpy matrixes train and test
        """
        # subtract the pixels from each other and get absolute value
        absolute = np.abs(train - test)

        # sum up all the differences into one number for each
        # training image. distances shape = (x_train.shape[0])
        distances = np.sum(absolute, axis=1)

        return distances

    async def predict(self, x):
        """
        Loops through all the images in x and performs
        nearest neighbors sum of all the pixels between
        the selected x image, and the images in the training
        set. Then, it selects the minimum value from these
        distances.

        params:
            x - numpy array of test images
        """
        # total amount of x test images
        length = x.shape[0]

        # creating a shape = (length) zeroed array for predicitions
        y_pred = np.zeros(length, dtype=self.y_train.dtype)

        futures = []

        # looping through testing images and running concurrently!
        for i in range(length):
            if self.distance == 'l1':
                futures.append(self.l1_distance(self.x_train, x[i, :]))
            elif self.distance == 'l2':
                futures.append(self.l2_distance(self.x_train, x[i, :]))
        distance_list = await asyncio.gather(*futures)

        for i in range(length):
            distances = distance_list[i]

            # Find the minimum number from all those distances
            # and get the index.
            min_index = np.argmin(distances)
            
            # That index is the index of our best guess! Hooray!
            y_pred[i] = self.y_train[min_index]
        
        return y_pred

In [None]:
# flatten the images in the dataset so we have a array of pixels
# not a 3 dimensional width, height, depth matrixs
x_train_flat = x_train.reshape(x_train.shape[0], 32 * 32 * 3)
x_test_flat = x_test.reshape(x_test.shape[0], 32 * 32 * 3)

distances = ['l1', 'l2']
sample = 500

for d in distances:
    model = NearestNeighbor(distance=d)
    model.train(x_train_flat, y_train)
    y_pred = await model.predict(x_test_flat[:sample])
    accuracy = np.mean(y_pred == y_test[:sample].reshape(sample))
    print(f'distance={d}, accuracy={accuracy*100}%')
