Inspired by Deep Learning for Computer Vision with Python [Rosebrock]  
Chapter 7  
K-NN Classifier

In [None]:
from os.path import expanduser
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report

from imutils import paths
from dlcv_utils import SimplePreprocessor, SimpleDatasetLoader

In [None]:
# path to input dataset
DATASET = expanduser('~/dlcv/code/datasets/animals')

# get the list of image paths
image_paths = list(paths.list_images(DATASET))
if not image_paths:
    print('Error! No files found!')
    quit()

# initialize the image preprocessor
sp = SimplePreprocessor(width=32, height=32)

# load the dataset from disk
sdl = SimpleDatasetLoader(preprocessors=[sp])
(data, labels) = sdl.load(image_paths, verbose=500)
print('data shape', data.shape)

# reshape the data matrix
num_files, height, width, channels = data.shape
print(f'{num_files=}')
print(f'{height=}')
print(f'{width=}')
print(f'{channels=}')
data = data.reshape((num_files, height * width * channels))
print('data shape', data.shape)

# encode the labels as integers (from text)
le = LabelEncoder()
labels = le.fit_transform(labels)

# split the data into training and testing sets
(features_train, features_test, labels_train, labels_test) = train_test_split(
    data, labels, test_size=0.25, random_state=42) # 25% for testing

In [None]:
# number of nearest neighbors for classification
NEIGHBORS = 1

# number of jobs for k-NN distance (-1 uses all available cores)
NUM_JOBS = -1

# train a k-NN classifier on the raw pixel intensities
model = KNeighborsClassifier(n_neighbors=NEIGHBORS, n_jobs=NUM_JOBS)
model.fit(features_train, labels_train)

In [None]:
# evaluate the k-NN classifier
report = classification_report(
    y_true=labels_test,
    y_pred=model.predict(features_test),
    target_names=le.classes_)
print(report)