In [152]:
%install_ext https://raw.githubusercontent.com/SiggyF/notebooks/master/pep8_magic.py

import numpy as np
import matplotlib.pyplot as plt


In [153]:
# Xtr (of size 50,000 x 32 x 32 x 3) holds all the images in the training set, and a corresponding 1-dimensional array
# Ytr (of length 50,000) holds the training labels (from 0 to 9):
# Xtr_rows = Xtr.reshape(Xtr.shape[0], 32 * 32 * 3) # Xtr_rows becomes 50000 x 3072
# Xte_rows = Xte.reshape(Xte.shape[0], 32 * 32 * 3) # Xte_rows becomes 10000 x 3072
# e = evaluation
# nn.train(Xtr_rows, Ytr)

class NearestNeighbor(object):
  def __init__(self):
    pass

  def train(self, X, y):
    """ X is N x D where each row is an example. Y is 1-dimension of size N """
    # the nearest neighbor classifier simply remembers all the training data
    self.Xtr = X
    self.ytr = y

  def predict(self, X):
    """ X is N x D where each row is an example we wish to predict label for """
    num_test = X.shape[0]
    # lets make sure that the output type matches the input type
    Ypred = np.zeros(num_test, dtype = self.ytr.dtype)
    #print ('num_test', num_test)
    #print ("X", len(X))
    #print ("Xtr", len(self.Xtr))
    # loop over all test rows
    for i in range(num_test):
      # find the nearest training image to the i'th test image
      # using the L1 distance (sum of absolute value differences)
      #print ("xtr ",)
      #print ("x[i] ",X[i])
      
      absolute = np.abs(self.Xtr - X[i,:])
      
      distances = np.sum(absolute, axis = 1)
      #print('distances', distances)
      min_index = np.argmin(distances) # get the index with smallest distance
      #print('min_index', min_index)
      #print(self.ytr)
      Ypred[i] = self.ytr[min_index] # predict the label of the nearest example

    return Ypred

In [154]:
import io
import csv
import random
from pprint import pprint

def loadDataset(filename):
    dataset=[] 
    with io.open(filename, 'rt', encoding="UTF8") as csvfile:
        lines = csv.reader(csvfile)
        dataset = list(filter(lambda line: len(line) > 0, lines))
    return dataset
         
dataset = loadDataset('data/iris.data')

# ramdomize dateSet

rate = int(len(dataset)*0.66)
#print('rate', rate)
#print('dataset'+ repr(dataset))
  
pprint(dataset)
np.random.shuffle(dataset)
print('----')
pprint(dataset)
trainingSet = np.split(dataset, [rate])[0] 
evaluationSet = np.split(dataset, [rate])[1] 

#print ('trainingSet: ', repr(trainingSet))
#print ('evaluationSet: ', repr(evaluationSet))

trainingDataSet = np.array(list(map( lambda data : data[:len(data)-1] , trainingSet)), dtype=float)
trainingLabelsSet = np.array(list(map( lambda data : data[len(data)-1:][0] , trainingSet)))

evaluationDataSet = np.array(list(map( lambda data : data[:len(data)-1] , evaluationSet)), dtype=float)
evaluationLabelsSet = np.array(list(map( lambda data : data[len(data)-1:][0] , evaluationSet)))

#print ('trainingDataSet: ' + repr((trainingDataSet)))
#print ('trainingLabelsSet: ' + repr((trainingLabelsSet)))
#print ('evaluationDataSet: ' + repr((evaluationDataSet)))
#print ('evaluationLabelsSet: ' + repr((evaluationLabelsSet)))


nn = NearestNeighbor()

nn.train(trainingDataSet, trainingLabelsSet)

result = nn.predict(evaluationDataSet)
#labeledResult = np.array(list(map( lambda data : data[len(data)-1:][0] , result)))
#print('result:', result)
print ('accuracy:' ,  np.mean(result == evaluationLabelsSet) )

# split data /labels

# pass to NN

# calc accuracy



[['5.1', '3.5', '1.4', '0.2', 'Iris-setosa'],
 ['4.9', '3.0', '1.4', '0.2', 'Iris-setosa'],
 ['4.7', '3.2', '1.3', '0.2', 'Iris-setosa'],
 ['4.6', '3.1', '1.5', '0.2', 'Iris-setosa'],
 ['5.0', '3.6', '1.4', '0.2', 'Iris-setosa'],
 ['5.4', '3.9', '1.7', '0.4', 'Iris-setosa'],
 ['4.6', '3.4', '1.4', '0.3', 'Iris-setosa'],
 ['5.0', '3.4', '1.5', '0.2', 'Iris-setosa'],
 ['4.4', '2.9', '1.4', '0.2', 'Iris-setosa'],
 ['4.9', '3.1', '1.5', '0.1', 'Iris-setosa'],
 ['5.4', '3.7', '1.5', '0.2', 'Iris-setosa'],
 ['4.8', '3.4', '1.6', '0.2', 'Iris-setosa'],
 ['4.8', '3.0', '1.4', '0.1', 'Iris-setosa'],
 ['4.3', '3.0', '1.1', '0.1', 'Iris-setosa'],
 ['5.8', '4.0', '1.2', '0.2', 'Iris-setosa'],
 ['5.7', '4.4', '1.5', '0.4', 'Iris-setosa'],
 ['5.4', '3.9', '1.3', '0.4', 'Iris-setosa'],
 ['5.1', '3.5', '1.4', '0.3', 'Iris-setosa'],
 ['5.7', '3.8', '1.7', '0.3', 'Iris-setosa'],
 ['5.1', '3.8', '1.5', '0.3', 'Iris-setosa'],
 ['5.4', '3.4', '1.7', '0.2', 'Iris-setosa'],
 ['5.1', '3.7', '1.5', '0.4', 'Iri

 ['5.6', '2.7', '4.2', '1.3', 'Iris-versicolor'],
 ['5.1', '3.8', '1.6', '0.2', 'Iris-setosa'],
 ['5.8', '2.8', '5.1', '2.4', 'Iris-virginica'],
 ['5.5', '2.4', '3.7', '1.0', 'Iris-versicolor'],
 ['4.9', '2.5', '4.5', '1.7', 'Iris-virginica'],
 ['6.7', '3.1', '4.4', '1.4', 'Iris-versicolor'],
 ['7.2', '3.6', '6.1', '2.5', 'Iris-virginica'],
 ['6.3', '2.5', '4.9', '1.5', 'Iris-versicolor'],
 ['4.8', '3.4', '1.6', '0.2', 'Iris-setosa']]
('accuracy:', 0.9019607843137255)
