# K-nearest neighbors
***

In this notebook I will attempt to implement the K-nearest neighbors algorithm using the [CIFAR-10 dataset](http://www.cs.toronto.edu/~kriz/cifar.html) for the Stanford course: [CS231n: Convolutional Neural Networks for Visual Recognition](http://cs231n.stanford.edu/2017/syllabus)


In [60]:
# Importing libraries
import numpy as np
import matplotlib.pyplot as plt
import pickle

## Loading the CIFAR-10 dataset
***

In [116]:
def load_dataset(files):
    # From: https://www.cs.toronto.edu/~kriz/cifar.html
    data = []
    data_labels = []
    for i in range(len(files)):
        with open(files[i], 'rb') as fo:
            dict = pickle.load(fo, encoding='bytes')
            data.append(dict[b'data'])
            data_labels.append(dict[b'labels'])

    data = np.array(data)
    data = data.reshape(data.shape[0] * data.shape[1], data.shape[2])
    data_labels = np.array(data_labels)
    return data, data_labels

In [117]:
b1 = "../cifar-10-batches-py/data_batch_1"
b2 = "../cifar-10-batches-py/data_batch_2"
b3 = "../cifar-10-batches-py/data_batch_3"
b4 = "../cifar-10-batches-py/data_batch_4"
b5 = "../cifar-10-batches-py/data_batch_5"
batches = [b1, b2, b3, b4, b5]

In [118]:
train, train_labels = load_dataset(batches)

In [119]:
test_batch = ["../cifar-10-batches-py/test_batch"]
test, test_labels = load_dataset(test_batch)

In [120]:
test.shape

(10000, 3072)

Creating KNearestNeighbor class:

In [127]:
class KNearestNeighbor(object):
    def __init__(self, k):
        self.K = k
    
    def train(self, X, y):
        self.Xtr = X
        self.ytr = y
        
    def predict(self, X):
        num_test = X.shape[0]
        Ypred = np.zeros(num_test, dtype = self.ytr.dtype)

        for i in range(num_test):
          # find the nearest training image to the i'th test image
          # using the L1 distance (sum of absolute value differences)
            distances = np.sum(np.abs(self.Xtr - X[i,:]), axis = 1)
            min_index = np.argmin(distances) # get the index with smallest distance
            Ypred[i] = self.ytr[min_index] # predict the label of the nearest example

        return Ypred