# Create First Image Classifier using kNN (K Nearest Neighbor)

This algorithm is not actually learn, instead it only use euclidean distances and choose the nearest eucledian distances among the class. The largest downside is also the model need to save all the images to compare, so it would have a really big model and of course computationally expensive.

## Import the Necessary Packages

In [58]:
# Import the necessary packages

from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from imutils import paths
import numpy as np
import cv2
import os

# Preprocessing

In [59]:
class SimpleProcessor:
    def __init__(self,width,height,inter=cv2.INTER_AREA):
        self.width=width
        self.height=height
        self.inter=inter
        
    def preprocess(self,image):
        #Resize image ignoring aspect ratio
        return cv2.resize(image,(self.width,self.height),interpolation=self.inter)
        

# Building Image Loader

In [60]:
class SimpleDatasetLoader:
    def __init__(self,preprocessors=None):
        self.preprocessors=preprocessors
        
        if self.preprocessors is None:
            self.preprocessors=[]
            
    def load(self,imagePaths,verbose=1):
        data=[]
        labels=[]
        
        #Loop over the input images
        for (i,imagePath) in enumerate(imagePaths):
            # load the image and extract the class label assuming
            # that our path has the following format:
            # /path/to/dataset/{class}/{image}.jpg
            image=cv2.imread(imagePath)
            label=imagePath.split(os.path.sep)[-2]
            
            if self.preprocessors is not None:
                #loop over the preprocessors to each image
                
                for p in self.preprocessors:
                    image=p.preprocess(image)
                    
            # treat our processed image as a "feature vector"
            # by updating the data list followed by the labels         
            data.append(image)
            labels.append(label)
            
            # show an update every `verbose` images
            if verbose > 0 and i > 0 and (i + 1) % verbose == 0:
                print("[INFO] processed {}/{}".format(i + 1,len(imagePaths)))
                
        return (np.array(data),np.array(labels))


# Implementing KNN

In [61]:
dataset="Images/dataset/animals"
neighbors=1
jobs=-1

In [62]:
print("[INFO] loading images...")
imagePaths = list(paths.list_images(dataset))
                  
sp=SimpleProcessor(32,32)
sdl=SimpleDatasetLoader(preprocessors=[sp])
(data,labels)=sdl.load(imagePaths,verbose=500)
data=data.reshape((data.shape[0],3072)) #3072 is 32x32x3

# show some information on memory consumption of the images
print("[INFO] features matrix: {:.1f}MB".format(data.nbytes / (1024 * 1024.0)))

[INFO] loading images...
[INFO] processed 500/3000
[INFO] processed 1000/3000
[INFO] processed 1500/3000
[INFO] processed 2000/3000
[INFO] processed 2500/3000
[INFO] processed 3000/3000
[INFO] features matrix: 8.8MB


In [63]:
# Encode the label

le=LabelEncoder()
labels=le.fit_transform(labels)

(trainX,testX,trainY,testY)=train_test_split(data,labels,test_size=0.25,random_state=42)


In [65]:
#Train and evaluate Knn Neigbor

model=KNeighborsClassifier(n_neighbors=neighbors,n_jobs=jobs)
model.fit(trainX,trainY)

KNeighborsClassifier(n_jobs=-1, n_neighbors=1)

In [67]:
print(classification_report(testY,model.predict(testX),target_names=le.classes_))

              precision    recall  f1-score   support

        cats       0.41      0.49      0.45       262
        dogs       0.35      0.47      0.40       249
       panda       0.70      0.31      0.43       239

    accuracy                           0.43       750
   macro avg       0.49      0.42      0.43       750
weighted avg       0.48      0.43      0.43       750

