# k-NN Image Classifier
This chapter aims to create a image classifier for a dataset consisting of images of dogs, cats and pandas.
k-NN is the simplest machine learning algorithm. 
It works like this:
In the training phase the images are put in a cartesian path, where the axes are the values of the feacture vectors, and so each datapoint is placed in some point in the plane.

In [1]:
import argparse
from imutils import paths
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

from utilities.preprocessing import SimplePreprocessor
from utilities.datasets import SimpleDatasetLoader

import cv2

dataset = '../datasets/animals'

# Get list of image paths
image_paths = list(paths.list_images(dataset))

# Initialize SimplePreprocessor and SimpleDatasetLoader and load data and labels
# It just resize the images to the needed size
print('[INFO]: Images loading....')
sp = SimplePreprocessor(32, 32)
sdl = SimpleDatasetLoader(preprocessors=[sp])
(data, labels) = sdl.load(image_paths, verbose=500)


# Reshape from (3000, 32, 32, 3) to (3000, 32*32*3=3072)
data = data.reshape((data.shape[0], 3072))

# Print information about memory consumption
print('[INFO]: Features Matrix: {:.1f}MB'.format(data.nbytes /(1024*1000.0)))

# Encode labels as integers, from cat, dog and panda to 0, 1 and 2 
le = LabelEncoder()
labels = le.fit_transform(labels)

[INFO]: Images loading....
[INFO]: Processed 500/3000
[INFO]: Processed 1000/3000
[INFO]: Processed 1500/3000
[INFO]: Processed 2000/3000
[INFO]: Processed 2500/3000
[INFO]: Processed 3000/3000
[INFO]: Features Matrix: 9.0MB


In [2]:
# Split data into training (75%) and testing (25%) data
# Our data consists of 3000 images, só 75% of it is 2250
(train_x, test_x, train_y, test_y) = train_test_split(data, labels, test_size=0.25, random_state=42)
print(train_x.shape)
print(train_x)
print(train_y)

(2250, 3072)
[[ 27  28  34 ...  93  95  88]
 [ 59  59 118 ...  79  62  69]
 [255 254 250 ...  55 114  80]
 ...
 [ 52  74  98 ...  94 126 154]
 [ 76  88  87 ... 138 117  95]
 [ 66  62 105 ...  64  85  73]]
[2 0 1 ... 1 1 0]


In [17]:
# Train and evaluate the k-NN classifier on the raw pixrandom_stateel intensities
print('[INFO]: Classification starting....')
model = KNeighborsClassifier(n_neighbors=2,
                             n_jobs=-1)

model.fit(train_x, train_y)
predictedValues = model.predict(test_x)
print(predictedValues)

print(classification_report(test_y, model.predict(test_x),
                            target_names=le.classes_))

[INFO]: Classification starting....
[0 0 0 0 2 0 0 0 1 1 2 0 1 0 1 1 0 0 0 1 0 0 0 0 0 1 2 0 0 0 0 1 0 0 0 1 0
 0 2 0 0 0 0 0 1 0 2 0 0 0 0 0 0 1 0 0 1 0 1 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0
 1 0 0 2 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1 0 0 0 0 0 1 0 2
 0 0 2 0 1 1 1 0 0 1 0 0 0 0 0 0 0 0 0 2 0 0 0 1 0 1 1 1 0 0 0 0 0 1 1 1 1
 2 0 1 0 0 1 0 0 0 2 1 1 0 0 0 1 0 0 0 1 1 0 1 1 1 0 1 0 1 1 1 1 0 0 1 2 0
 1 1 0 1 0 0 0 0 0 0 0 0 1 0 0 1 1 0 2 0 1 0 0 1 0 1 0 1 0 0 1 0 0 0 0 1 0
 1 1 0 0 1 1 0 0 0 0 1 1 1 0 0 1 1 0 1 0 1 0 0 0 0 0 0 0 1 1 0 0 0 2 1 0 2
 2 0 1 0 1 0 0 1 1 0 0 0 0 0 1 1 0 2 0 0 1 0 0 0 0 0 1 0 0 1 0 1 0 1 0 0 1
 0 2 2 0 0 0 0 0 0 1 0 1 0 0 0 1 0 0 0 2 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 1
 0 1 1 2 0 1 2 0 0 0 1 0 0 0 1 0 0 1 0 0 0 0 0 1 1 0 0 2 1 1 0 0 0 0 0 0 2
 0 0 0 1 0 1 0 1 0 0 0 1 0 0 1 1 0 0 1 0 1 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0
 1 0 1 0 1 0 1 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 1 1 0 2 0 0 0 1 1 2 1 0 0 1 1
 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 2 1 0 2 0

In [24]:
classificadas = test_x.reshape(750, 32, 32, 3)
count = 0

font                   = cv2.FONT_HERSHEY_SIMPLEX
bottomLeftCornerOfText = (105,240)
fontScale              = 1
fontColor              = (255,255,255)
lineType               = 2

predictedNames = le.inverse_transform(valoresPreditos)
for imagem in classificadas:
    imagem = cv2.resize(imagem, (256, 256), cv2.INTER_LINEAR)
    cv2.putText(imagem, str(predictedNames[count]), bottomLeftCornerOfText, font, fontScale, fontColor, lineType)
    cv2.imshow('classificadas', imagem)
    count+=1
    key = cv2.waitKey()
    if (key==ord('q')):
        break
    cv2.destroyAllWindows()
cv2.destroyAllWindows()