In [1]:
from keras.datasets import cifar10
(X_train, Y_train), (X_test, Y_test) = cifar10.load_data()


# inspect the data
# print(x_train[0])
print(X_train.shape)
Y_train[:5]


Using TensorFlow backend.


(50000, 32, 32, 3)


array([[6],
       [9],
       [9],
       [4],
       [1]], dtype=uint8)

In [2]:
import numpy as np

from keras.callbacks import EarlyStopping
from keras.datasets import cifar10
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Flatten
from keras.layers import LeakyReLU
from keras.layers.convolutional import Conv2D
from keras.optimizers import Adam
from keras.layers.pooling import MaxPooling2D
from keras.utils import to_categorical




np.random.seed(42) 
# set random seed for drop out etc... otherwise results are not reproducible / unreliable

# define model name and initialize using the Sequential API (which allows us to easily build our network layer by layer)

classier = Sequential()

In [3]:
'''
- adding a 32 filters with dimensions of 3x3
- padding to learn features at the edge
- input shape is 32x32 pixel img, depth of 3 for RBG channels
'''
classier.add(Conv2D(32, kernel_size=(3, 3), padding='same', input_shape=(32, 32, 3)))
'''
- leakyReLU tend to perform better than RELU because there is a different slope (flatter slope, default = 0.3) for the negative values 
- standard ReLU propagates less information to the next layers because gradient is zero for all negative values
- LeakyRelU is one solution (ELU also) that addresses the Dying RELU problem where a large gradient update causes the RELU neuron unit to never activate again
'''
classier.add(LeakyReLU(alpha=0.3)) 
classier.add(Conv2D(64, padding='same', kernel_size=(3, 3)))
classier.add(LeakyReLU(alpha=0.3))
classier.add(MaxPooling2D(pool_size=(2, 2)))


W0605 12:28:05.320252 4748301760 deprecation_wrapper.py:119] From /usr/local/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:4070: The name tf.nn.max_pool is deprecated. Please use tf.nn.max_pool2d instead.



In [4]:

'''
- drop out is used to regularize neural networks
- adding dropout after pooling layer tend to lead to better performance
'''
classier.add(Dropout(0.25)) 


In [5]:
'''
the following block is similar in structure to previous blocks but with more filters
'''
classier.add(Conv2D(128, kernel_size=(3, 3)))
classier.add(MaxPooling2D(pool_size=(2, 2)))
classier.add(Conv2D(128, kernel_size=(3, 3)))
classier.add(LeakyReLU(alpha=0.3))
classier.add(MaxPooling2D(pool_size=(2, 2)))
classier.add(Dropout(0.25))

classier.add(Flatten()) # Flatten the feature map tensor to 1D array


In [6]:

'''
below is the classification Dense Neural Net
typically a shallow DNN performs sufficiently and is faster to train 
'''
classier.add(Dense(1024))
classier.add(LeakyReLU(alpha=0.3))
classier.add(Dropout(0.5))
classier.add(Dense(10, activation='softmax'))


In [7]:
'''
after the model architecture has been specified (roughly equivalent to specifying the computational graph)

we can compile the model by providing the loss function and optimizer along with metrics you want to track

it is appropriate to use categorical_crossentropy as a loss function because the softmax layer is size > 2 (if 2, use binary_crossentropy) and classes are mutually exclusive, if not use binary_crossentropy

ADAM optimizer with a lower learning rate and low decay is a safe option especially because we are using early stop callback (see below), we are not concerned about training time for a relatively simple dataset especially when training on GPU
'''
classier.compile(loss='categorical_crossentropy',
              optimizer=Adam(lr=0.0001, decay=1e-6),
              metrics=['accuracy'])


In [None]:
'''
Train the model:
- the X_train / 255 is a way to normalize the data since the pixel values go from 0-255
- not normalizing the data, especially for images, will lead to substantial degradation in model performance
- we need to convert the labels (integer from 0-9) into a one-hot encoded vector of size 10 for the loss function
- the early stop callback reduces overfitting, when the (by default) validation loss has not improved more than 0.01 in 3 epochs, we terminate training
- another practical advantage of using early stop is it removes the need to tune number of epochs as a hyper parameter (set too low, model could have performed better, set too high overfit and waste compute / $)
'''
history = classier.fit(X_train / 255.0, to_categorical(Y_train),
          batch_size=128,
          shuffle=True,
          epochs=250,
          validation_data=(X_test / 255.0, to_categorical(Y_test)),
          callbacks=[EarlyStopping(min_delta=0.01, patience=4)])



W0605 12:29:05.702309 4748301760 deprecation_wrapper.py:119] From /usr/local/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:422: The name tf.global_variables is deprecated. Please use tf.compat.v1.global_variables instead.



Train on 50000 samples, validate on 10000 samples
Epoch 1/250
Epoch 2/250

In [None]:
# calculate the accuracy on the test set, it is important to normalize your training and test data the same way
scores = classier.evaluate(X_test / 255.0, to_categorical(Y_test))

print('Loss: %.3f' % scores[0])
print('Accuracy: %.3f' % scores[1])


In [None]:
import matplotlib.pyplot as plt
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [None]:
sample_id = 108

plt.figure(figsize=(4,4))
plt.imshow(X_test[sample_id])
plt.axis('off')
print(Y_test[sample_id])


In [None]:
'''
make a prediction using the model:
- notice that the dimensions of the test data is the same as training data, it is 4D by default [index, height, width, channel]
- we must reshape the sampled image to a shape the model expects by removing the outer dimension (remove the index dimension). This is the same reason why the label output from above is [8] instead of 8 by default.
- we also need to normalize our data the same way we did during training
'''

softmax_output = classier.predict(X_test[108].reshape((1,) + X_test[108].shape)/255.)
softmax_output


In [None]:
np.argmax(softmax_output)

In [None]:
from sklearn.metrics import confusion_matrix
confusion_matrix = confusion_matrix(Y_test, Y_preds.argmax(axis=1))

confusion_matrix


In [None]:
plt.matshow(confusion_matrix)
plt.colorbar()
