# =============================================================
# Image classification using CNN  (Convolutional Neural Net)
## This code uses CNN to classify 3000 images consisting of cats, dogs and pandas into correct labels. 
## Steps are as follows
### 1.  First divides input set into training set and test set
### 2.  Train CNN model with training set
### 3.   Test CNN model with test set
# =============================================================

###         Declare namespaces for packages used in code

In [1]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from greenheritagellc.preprocessing import Preprocessor
from greenheritagellc.datasets import DatasetLoader
from imutils import paths

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


### Define inputs

In [2]:
dataset_path = ".\\datasets\\animals"
numberofneighbors = 3
numberofcores = 1

### Grab the list of images that we'll be describing

In [3]:
print("[INFO] loading images...")
imagePaths = list(paths.list_images(dataset_path))


[INFO] loading images...


### Initialize the image preprocessor, load the dataset from disk and reshape the data matrix

In [None]:
pp = Preprocessor(32, 32)
dl = DatasetLoader(preprocessors=[pp])
(data, labels) = dl.load(imagePaths, verbose=500)
print(data.shape)
print(labels.shape)
data = data.reshape((data.shape[0], 3072))
print(data.shape)

imagepath: .\datasets\animals\cats\cats_00001.jpg
label before: ['.', 'datasets', 'animals', 'cats', 'cats_00001.jpg']
label after: cats
[INFO] processed 500/3000


### Show some information on memory consumption of the images

In [None]:
print("[INFO] features matrix: {:.1f}MB".format(
	data.nbytes / (1024 * 1024.0)))

### Encode the labels as integers

In [None]:
le = LabelEncoder()
labels = le.fit_transform(labels)

### Partition the data into training and testing splits using 75% of the data for training and the remaining 25% for testing

In [None]:
(trainX, testX, trainY, testY) = train_test_split(data, labels, test_size=0.25, random_state=42)
print(trainX.shape)
print(testX.shape)

### Train and evaluate a k-NN classifier on the raw pixel intensities

In [None]:
print("[INFO] evaluating k-NN classifier...")
model = KNeighborsClassifier(n_neighbors=numberofneighbors, n_jobs=numberofcores)
model.fit(trainX, trainY)


In [None]:
print(le.classes_)

In [None]:
print(classification_report(testY, model.predict(testX)))