In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np

#matplotlib.use("Agg")
import itertools

In [None]:
from sklearn.preprocessing import LabelBinarizer
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

In [None]:
# if necessary, set CUDA_VISIBLE_DEVICES to -1 to use CPU
# or to GPU ID (e.g. 0) to use GPU
import os
#os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"   # see issue #152
#os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [None]:
import keras
from keras.models import Sequential
from keras.layers.normalization import BatchNormalization
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras.layers.core import Activation
from keras.layers.core import Flatten
from keras.layers.core import Dropout
from keras.layers.core import Dense
from keras import backend as K

In [None]:
from keras.optimizers import SGD
from keras.datasets import cifar10

In [None]:
# TF backend assumes "channels_last data shape layout
# For 2D data "channels_last" means (rows, cols, channels)
# while "channels_first" assumes (channels, rows, cols). 
K.image_data_format()

In [None]:
# load CIFAR10 data
((trainX, trainY), (testX, testY)) = cifar10.load_data()

In [None]:
print(trainX.shape, testX.shape)
print(trainY.shape, testY.shape)

In [None]:
# initialize the label names for the CIFAR-10 dataset
labelNames = ["airplane", "automobile", "bird", "cat", "deer", "dog", "frog", "horse", "ship", "truck"]
num_classes = len(labelNames)

Explore the dataset

In [None]:
n = 0
print(trainY[n,:])
#print(labelNames[np.argmax(trainY[n,:])])
print(labelNames[trainY[n,0]])
plt.imshow(trainX[n,::], interpolation='nearest')

In [None]:
# normalise data
trainX = trainX.astype("float") / 255.0
testX = testX.astype("float") / 255.0

Binarize labels in a one-vs-all fashion

Several regression and binary classification algorithms are
available in the scikit-learn. A simple way to extend these algorithms
to the multi-class classification case is to use the so-called
one-vs-all scheme.

At learning time, this simply consists in learning one regressor
or binary classifier per class. In doing so, one needs to convert
multi-class labels to binary labels (belong or does not belong
to the class). LabelBinarizer makes this process easy with the
transform method.

At prediction time, one assigns the class for which the corresponding
model gave the greatest confidence. LabelBinarizer makes this easy
with the inverse_transform method.

In [None]:
# convert the labels from integers to vectors
# Fit label binarizer and transform multi-class labels to binary labels.
# The output of transform is sometimes referred to as the 1-of-K coding scheme.

# Why are we using fit_transform in one case and transform in the other
# even though dimensions of are identical?

lb = LabelBinarizer()
trainY = lb.fit_transform(trainY)
testY = lb.transform(testY)

Alternatively the same can be achieved using Keras rather than scikit-learn. 
Convert class vectors to binary class matrices using keras.utils.to_categorical

In [None]:
#trainY = keras.utils.to_categorical(trainY, num_classes)
#testY = keras.utils.to_categorical(testY, num_classes)

In [None]:
print(trainY[0])
print(testY[0])

Define model

In [1]:
def build_model(width, height, depth, classes, dropout=False, batch_normalisation=False):

    model = Sequential()
    inputShape = (height, width, depth)
    chanDim = -1
    
    # if we are using "channels first", update the input shape
    # and channels dimension
    if K.image_data_format() == "channels_first":
        print("adapt shape to channels_first")
        inputShape = (depth, height, width)
        chanDim = 1

    # first CONV => RELU => BN => CONV => RELU => BN => POOL layer set
    model.add(Conv2D(32, (3, 3), input_shape=inputShape))
    model.add(Activation("relu"))
    if batch_normalisation:
        model.add(BatchNormalization(axis=chanDim))
    model.add(Conv2D(32, (3, 3)))
    model.add(Activation("relu"))
    if batch_normalisation:
        model.add(BatchNormalization(axis=chanDim))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    if dropout:
        model.add(Dropout(0.25))

    # second CONV => RELU => BN => CONV => RELU => BN => POOL layer set
    model.add(Conv2D(64, (3, 3)))
    model.add(Activation("relu"))
    if batch_normalisation:
        model.add(BatchNormalization(axis=chanDim))
    model.add(Conv2D(64, (3, 3)))
    model.add(Activation("relu"))
    if batch_normalisation:
        model.add(BatchNormalization(axis=chanDim))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    if dropout:
        model.add(Dropout(0.25))

    # first (and only) set of FC => RELU => BN layers
    model.add(Flatten())
    model.add(Dense(512))
    model.add(Activation("relu"))
    if batch_normalisation:
        model.add(BatchNormalization())
    if dropout:
        model.add(Dropout(0.5))

    # softmax classifier
    model.add(Dense(classes))
    model.add(Activation("softmax"))

    # return the constructed network architecture
    return model

In [None]:
# initialize the model

model = build_model(width=32, height=32, depth=3, classes=10, dropout=False, batch_normalisation=False)

In [None]:
model.summary()

In [None]:
# initialize the optimizer

opt = SGD(lr=0.01, decay=0.01 / 40, momentum=0.9, nesterov=True)
#opt = keras.optimizers.rmsprop(lr=0.0001, decay=1e-6)

model.compile(loss="categorical_crossentropy",
              optimizer=opt,
              metrics=["accuracy"])

In [None]:
# train the network. On 1080 Ti it takes 4-5 sec per epoch, on fast CPU ~ 45 sec
#                      w/o BN   with BN
# batch_size   64- 128     4s
# batch_size  256-1024     3s       5s

nb_epoch = 30  # 40

H = model.fit(trainX, trainY, 
              batch_size=64, 
              epochs=nb_epoch, 
              validation_data=(testX, testY),
              shuffle=True)

In [None]:
# save the model (with weights) and just weights
# model can be later loaded with
# keras.models.load_model(filename)
# for weights to be loaded a model needs to be defined first

save_dir = os.path.join(os.getcwd(), 'saved_models')
if not os.path.isdir(save_dir):
    os.makedirs(save_dir)

model_name = 'keras_cifar10_trained_model.h5'
model_path = os.path.join(save_dir, model_name)
model.save(model_path)

weights_name = 'keras_cifar10_trained_weights.h5'
weights_path = os.path.join(save_dir, weights_name)
model.save_weights(weights_path)

In [None]:
# evaluate the network
predictions = model.predict(testX, batch_size=64)
print(classification_report(testY.argmax(axis=1),
    predictions.argmax(axis=1), target_names=labelNames))

In [None]:
# Generate Confusion matrix and its plot
orig_testY=testY.argmax(axis=1)
pred_testY=predictions.argmax(axis=1)
cnf_matrix=confusion_matrix(orig_testY,pred_testY)

In [None]:
# plot the loss
plt.style.use("seaborn-white")
plt.figure()
plt.plot(np.arange(0, nb_epoch), H.history["loss"], label="train_loss")
plt.plot(np.arange(0, nb_epoch), H.history["val_loss"], label="val_loss")
plt.title("Training and Validation Loss on CIFAR-10")
plt.xlabel("Number of Epochs")
plt.ylabel("Loss")
plt.grid(True)
plt.legend()
plt.show()

In [None]:
# plot the accuracy
plt.style.use("seaborn-white")
plt.figure()
plt.plot(np.arange(0, nb_epoch), H.history["acc"], label="train_acc")
plt.plot(np.arange(0, nb_epoch), H.history["val_acc"], label="val_acc")
plt.title("Training and Validation Accuracy on CIFAR-10")
plt.xlabel("Number of Epochs")
plt.ylabel("Accuracy")
plt.grid(True)
plt.legend()
plt.show()

In [None]:
model.load_weights(weights_path)

In [None]:
# Score trained model.
# Computes the loss on some input data, batch by batch.
scores = model.evaluate(testX, testY, batch_size=64, verbose=1)
print('Test loss:', scores[0])
print('Test accuracy:', scores[1])

In [None]:
# initialize the optimizer

opt = SGD(lr=0.001, decay=0.01 / 40, momentum=0.9, nesterov=True)
#opt = keras.optimizers.rmsprop(lr=0.0001, decay=1e-6)

model.compile(loss="categorical_crossentropy",
              optimizer=opt,
              metrics=["accuracy"])

In [None]:
nb_epoch = 10  # 40

H = model.fit(trainX, trainY, 
              batch_size=64, 
              epochs=nb_epoch, 
              validation_data=(testX, testY),
              shuffle=True)