# =============================================================
# Image classification using CNN  (Convolutional Neural Net)
## This code uses CNN to classify 3000 images consisting of cats, dogs and pandas into correct labels. 
## Steps are as follows
### 1.  First divides input set into training set and test set
### 2.  Train CNN model with training set
### 3.   Test CNN model with test set
# =============================================================

###         Declare namespaces for packages used in code

In [1]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from greenheritagellc.preprocessing import Preprocessor
from greenheritagellc.datasets import DatasetLoader
from imutils import paths

### Define inputs

In [2]:
dataset_path = ".\\datasets\\animals"
numberofneighbors = 3
numberofcores = 1

### Grab the list of images that we'll be describing

In [3]:
print("[INFO] loading images...")
imagePaths = list(paths.list_images(dataset_path))


[INFO] loading images...


### Initialize the image preprocessor, load the dataset from disk and reshape the data matrix

In [4]:
pp = Preprocessor(32, 32)
dl = DatasetLoader(preprocessors=[pp])
(data, labels) = dl.load(imagePaths, verbose=500)
print(data.shape)
print(labels.shape)
data = data.reshape((data.shape[0], 3072))
print(data.shape)

imagepath: .\datasets\animals\cats\cats_00001.jpg
label before: ['.', 'datasets', 'animals', 'cats', 'cats_00001.jpg']
label after: cats
[INFO] processed 500/3000
[INFO] processed 1000/3000
[INFO] processed 1500/3000
[INFO] processed 2000/3000
[INFO] processed 2500/3000
[INFO] processed 3000/3000
(3000, 32, 32, 3)
(3000,)
(3000, 3072)


### Show some information on memory consumption of the images

In [5]:
print("[INFO] features matrix: {:.1f}MB".format(
	data.nbytes / (1024 * 1024.0)))

[INFO] features matrix: 8.8MB


### Encode the labels as integers

In [6]:
le = LabelEncoder()
labels = le.fit_transform(labels)

### Partition the data into training and testing splits using 75% of the data for training and the remaining 25% for testing

In [7]:
(trainX, testX, trainY, testY) = train_test_split(data, labels, test_size=0.25, random_state=42)
print(trainX.shape)
print(testX.shape)

(2250, 3072)
(750, 3072)


### Train and evaluate a k-NN classifier on the raw pixel intensities

In [8]:
print("[INFO] evaluating k-NN classifier...")
model = KNeighborsClassifier(n_neighbors=numberofneighbors, n_jobs=numberofcores)
model.fit(trainX, trainY)


[INFO] evaluating k-NN classifier...


KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=1, n_neighbors=3, p=2,
                     weights='uniform')

In [9]:
print(le.classes_)

['cats' 'dogs' 'panda']


In [10]:
print(classification_report(testY, model.predict(testX)))

              precision    recall  f1-score   support

           0       0.41      0.58      0.48       262
           1       0.38      0.47      0.42       249
           2       0.86      0.26      0.40       239

    accuracy                           0.44       750
   macro avg       0.55      0.44      0.43       750
weighted avg       0.54      0.44      0.44       750

