In [34]:
import numpy as np


In [35]:
# Xtr (of size 50,000 x 32 x 32 x 3) holds all the images in the training set, and a corresponding 1-dimensional array
# Ytr (of length 50,000) holds the training labels (from 0 to 9):
# Xtr_rows = Xtr.reshape(Xtr.shape[0], 32 * 32 * 3) # Xtr_rows becomes 50000 x 3072
# Xte_rows = Xte.reshape(Xte.shape[0], 32 * 32 * 3) # Xte_rows becomes 10000 x 3072
# e = evaluation
# nn.train(Xtr_rows, Ytr)


class NearestNeighbor(object):
    def __init__(self):
        pass

    def train(self, trainingSet, labelSet):
        """ trainingSet is N x D where each row is an example. labelSet is 1-dimension of size N """
        # the nearest neighbor classifier simply remembers all the training data
        self.trainingSet = trainingSet
        self.labelSet = labelSet

    def predict(self, predictSet):
        """ predictSet is N x D where each row is an example we wish to predict label for """
        num_test = predictSet.shape[0]
        # lets make sure that the output type matches the input type
        result = np.zeros(num_test, dtype=self.labelSet.dtype)
        # loop over all test rows
        for i in range(num_test):
            # find the nearest training image to the i'th test image
            # using the L1 distance (sum of absolute value differences)
            absolute = list(map(lambda line: np.abs(
                line - predictSet[i, :]), self.trainingSet))

            distances = np.sum(absolute, axis=1)
            # get the index with smallest distance
            min_index = np.argmin(distances)
            # predict the label of the nearest example
            result[i] = self.labelSet[min_index]

        return result

In [36]:
import io
import csv
import random
from pprint import pprint


def loadDataset(filename):
    dataset = []
    with io.open(filename, 'rt', encoding="UTF8") as csvfile:
        lines = csv.reader(csvfile)
        dataset = list(filter(lambda line: len(line) > 0, lines))
    return dataset


dataset = loadDataset('data/iris.data')

rate = int(len(dataset)*0.66)
np.random.shuffle(dataset)

trainingSet = np.split(dataset, [rate])[0]
evaluationSet = np.split(dataset, [rate])[1]

trainingDataSet = np.array(
    list(map(lambda data: data[:len(data)-1], trainingSet)), dtype=float)
trainingLabelsSet = np.array(
    list(map(lambda data: data[len(data)-1:][0], trainingSet)))

evaluationDataSet = np.array(
    list(map(lambda data: data[:len(data)-1], evaluationSet)), dtype=float)
evaluationLabelsSet = np.array(
    list(map(lambda data: data[len(data)-1:][0], evaluationSet)))

nn = NearestNeighbor()

nn.train(trainingDataSet, trainingLabelsSet)

result = nn.predict(evaluationDataSet)
print ('accuracy:',  np.mean(result == evaluationLabelsSet))

('accuracy:', 0.9215686274509803)
