In [1]:
# !pip install opencv-python
# !pip install imutils
import gc
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

from pyimagesearch.preprocessing import SimplePreprocessor
from pyimagesearch.datasets import SimpleDatasetLoader

from imutils import paths
import argparse

In [26]:
args = {}
args["datasets"] = '../datasets/animals'
args["neighbors"] = 25
args["jobs"] = -1

In [3]:
imagePaths = list(paths.list_images(args['datasets']))

print(len(imagePaths))

3000


In [4]:
IM_DIM = 32
CHANNELS = 3

In [5]:
# Initialize the image preprocessor, load the dataset from disk,
# and reshape the data matrix
sp = SimplePreprocessor(IM_DIM, IM_DIM)
sdl = SimpleDatasetLoader(preprocessors=[sp])
(data, labels) = sdl.load(imagePaths, verbose=500)


[INFO] processed 500/3000
[INFO] processed 1000/3000
[INFO] processed 1500/3000
[INFO] processed 2000/3000
[INFO] processed 2500/3000
[INFO] processed 3000/3000


In [6]:
data.shape

(3000, 32, 32, 3)

In [7]:
data[1, :]

array([[[128, 104,  96],
        [141, 120, 115],
        [150, 134, 124],
        ...,
        [ 17,  20,  25],
        [ 15,  19,  23],
        [ 14,  18,  19]],

       [[124, 105,  96],
        [144, 130, 123],
        [152, 131, 117],
        ...,
        [ 18,  21,  26],
        [ 18,  21,  25],
        [ 15,  18,  22]],

       [[130, 111, 100],
        [144, 121, 109],
        [153, 138, 126],
        ...,
        [ 18,  23,  28],
        [ 18,  23,  27],
        [ 16,  21,  26]],

       ...,

       [[131, 152, 160],
        [130, 150, 155],
        [140, 163, 167],
        ...,
        [ 81,  68,  56],
        [ 70,  61,  52],
        [ 70,  74,  81]],

       [[128, 152, 160],
        [127, 147, 150],
        [137, 161, 165],
        ...,
        [ 67,  59,  49],
        [ 57,  50,  42],
        [ 49,  57,  66]],

       [[120, 138, 145],
        [121, 139, 145],
        [135, 159, 165],
        ...,
        [ 54,  48,  39],
        [ 39,  37,  30],
        [ 45,  51,  57]]

In [8]:
labels.shape

(3000,)

In [13]:
labels[0:5,]

array(['cats', 'cats', 'cats', 'cats', 'cats'], dtype='<U5')

In [14]:
data = data.reshape(data.shape[0], IM_DIM * IM_DIM * CHANNELS)

In [15]:
data.shape

(3000, 3072)

In [16]:
# Show some information on memory consumption of the images
print("[INFO] features matrix: {:.1f}MB".format(
    data.nbytes / (1024 * 1000.0)
))

[INFO] features matrix: 9.0MB


In [17]:
# Encode the labes as integers
le = LabelEncoder()
labels = le.fit_transform(labels)

In [18]:
labels.shape

(3000,)

In [24]:
labels[2000:2500,]

array([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,

In [25]:
# Partition the data into training and testing splits using 75% of 
# the data for training and the remaining 25% for testing
(trainX, testX, trainY, testY) = train_test_split(data, labels, test_size=0.25, random_state=42)


In [29]:
# Train and Evaluate a kNN classifier on the raw pixel intensities
print("[INFO] evaluating kNN classifier...")
model = KNeighborsClassifier(n_neighbors=args["neighbors"], n_jobs=args["jobs"])
model.fit(trainX, trainY)

[INFO] evaluating kNN classifier...


KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=-1, n_neighbors=25, p=2,
           weights='uniform')

In [30]:
print(classification_report(testY, model.predict(testX), target_names=le.classes_))

             precision    recall  f1-score   support

       cats       0.49      0.55      0.52       262
       dogs       0.37      0.53      0.44       249
      panda       0.93      0.38      0.54       239

avg / total       0.59      0.49      0.50       750



In [None]:
import gc
del data, labels
gc.collect();

In [31]:
le.classes_

array(['cats', 'dogs', 'panda'], dtype='<U5')