## Import the Packages

In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import pickle
import datetime
import math
import gc
from sklearn.preprocessing import minmax_scale, StandardScaler, normalize
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
from keras.models import Sequential
from keras.models import model_from_json
from keras.layers import Dense, LeakyReLU
from keras.layers import Dense, Activation, Flatten, Dropout, BatchNormalization
from keras.layers import Input, Conv2D, MaxPooling2D, AveragePooling2D, Softmax, Reshape
from keras.datasets import cifar10
from keras.utils import np_utils
from keras import regularizers, optimizers
from keras.optimizers import Adam
import keras

# check whether the GPU is working
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())
%matplotlib inline

# set random seeds for reproducibility
tf.reset_default_graph()
tf.set_random_seed(99)
np.random.seed(99)

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 710106050066476259
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 15785944679
locality {
  bus_id: 1
  links {
  }
}
incarnation: 5593524906556744719
physical_device_desc: "device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0"
]


___
## Functions 

In [2]:
def convert_to_one_hot(labels, num_class):
    labels = np.eye(num_class)[labels.reshape(-1)]
    return labels

def plot_images(images, cls_true, cls_pred=None, smooth=True):

    assert len(images) == len(cls_true) == 9

    # Create figure with sub-plots.
    fig, axes = plt.subplots(3, 3)
    
    images = np.transpose(images.reshape(len(images),3,32,32), (0, 2, 3, 1))
    
    if cls_pred is None:
        hspace = 0.3
    else:
        hspace = 0.6
    fig.subplots_adjust(hspace=hspace, wspace=0.3)
    
    for i, ax in enumerate(axes.flat):
        
        # Plot image.
        ax.imshow(images[i, :, :, :], interpolation= 'nearest')
        
        cls_true_name = class_names[cls_true[i]]

        # Show true and predicted classes.
        if cls_pred is None:
            xlabel = "True: {0}".format(cls_true_name)
        else:
            # Name of the predicted class.
            cls_pred_name = class_names[cls_pred[i]]

            xlabel = "True: {0}\nPred: {1}".format(cls_true_name, cls_pred_name)

        # Show the classes as the label on the x-axis.
        ax.set_xlabel(xlabel)
        
        # Remove ticks from the plot.
        ax.set_xticks([])
        ax.set_yticks([])
    
    plt.show()

___
## Read in Data
* scale the input X for train and validation data set
* convert the original Y, which in labels format, into one hot encoding format.

In [3]:
(train_X, train_Y_labels), (test_X, test_Y_labels) = cifar10.load_data()

train_X = train_X.astype('float32')
test_X = test_X.astype('float32')

# Scale the input X for training and test data set
train_X /= 255.0
test_X /= 255.0

# One hot encoding of Y
train_Y = convert_to_one_hot(train_Y_labels, 10)
test_Y = convert_to_one_hot(test_Y_labels, 10)

# Class Names
class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']

# The shape of training X and test X
print(train_X.shape)
print(test_X.shape)
print(train_Y.shape)
print(test_Y.shape)

(50000, 32, 32, 3)
(10000, 32, 32, 3)
(50000, 10)
(10000, 10)


___
# Train a CNN in Keras
* We used a 11 layers convolutional neural network, which purely made by convolutional layers, without max pooling and fully connnected layers.
* We used the leaky ReLu instead of ReLu to boost the training speed
* Overal, we train the CNN 150 epochs, and finally achieved more than 90% accuracy on validation set
* It worth noting that, we have used the data augmentation which applied random fliping, shifting, rotating and zooming on the training set, to robust our model and generalize the performance on the validation set
* We used a decayed learning rate during the 150 epochs

In [None]:
num_classes = 10
baseMapNum = 64
img_size = 32
channels = 3
img_shape = (img_size, img_size, channels)

model = Sequential()

model.add(Conv2D(baseMapNum, (3, 3), padding='same', input_shape = img_shape))
model.add(LeakyReLU(0.01))
model.add(Conv2D( baseMapNum, (3, 3), padding='same'))
model.add(LeakyReLU(0.01))
model.add(Conv2D( baseMapNum, (3, 3), padding='same'))
model.add(LeakyReLU(0.01))
model.add(Conv2D( baseMapNum, (3, 3), padding='same', strides=2))
model.add(LeakyReLU(0.01))
model.add(Dropout(0.25))

model.add(Conv2D(2 * baseMapNum, (3, 3), padding='same'))
model.add(LeakyReLU(0.01))
model.add(Conv2D(2 * baseMapNum, (3, 3), padding='same'))
model.add(LeakyReLU(0.01))
model.add(Conv2D(2 * baseMapNum, (3, 3), padding='same'))
model.add(LeakyReLU(0.01))
model.add(Conv2D(2 * baseMapNum, (3, 3),padding='same',  strides=2))
model.add(LeakyReLU(0.01))
model.add(Dropout(0.25))

model.add(Conv2D(2 * baseMapNum, (3, 3)))
model.add(LeakyReLU(0.01))
model.add(Conv2D(2 * baseMapNum, (1, 1), padding='same'))
model.add(LeakyReLU(0.01))
model.add(Conv2D(num_classes, (1, 1), padding='same'))
model.add(LeakyReLU(0.01))

# model.add(Conv2D(num_classes, (6, 6), strides=4, padding='same'))#
model.add(AveragePooling2D(6, 6))

model.add(Flatten())
model.add(Activation('softmax'))
# model.summary()

datagen = ImageDataGenerator(
    rotation_range = 0.2, 
    width_shift_range = 0.2,  # randomly shift images horizontally (fraction of total width)
    height_shift_range = 0.2,  # randomly shift images vertically (fraction of total height)
    zoom_range = 0.2, 
    horizontal_flip =True)

# Train the model
datagen.fit(train_X)


# Compile the model
optimizer = keras.optimizers.SGD(lr = 0.01, decay = 1e-6, momentum=0.9)
model.compile( loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
model_info_1  = model.fit_generator(datagen.flow(train_X, train_Y, batch_size = 64),
                    shuffle=True,
                    epochs = 30, validation_data = (test_X, test_Y))

optimizer = keras.optimizers.SGD(lr = 0.005, decay = 1e-6, momentum=0.9)
model.compile( loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
model_info_2  = model.fit_generator(datagen.flow(train_X, train_Y, batch_size = 64),
                    shuffle=True,
                    epochs = 30, validation_data = (test_X, test_Y))

optimizer = keras.optimizers.SGD(lr = 0.003, decay = 1e-6, momentum=0.9)
model.compile( loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
model_info_3  = model.fit_generator(datagen.flow(train_X, train_Y, batch_size = 64),
                    shuffle=True,
                    epochs = 30, validation_data = (test_X, test_Y))

optimizer = keras.optimizers.SGD(lr = 0.001, decay = 1e-6, momentum=0.9)
model.compile( loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
model_info_4  = model.fit_generator(datagen.flow(train_X, train_Y, batch_size = 64),
                    shuffle=True,
                    epochs = 30, validation_data = (test_X, test_Y))

optimizer = keras.optimizers.SGD(lr = 0.0005, decay = 1e-6, momentum=0.9)
model.compile( loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
model_info_5  = model.fit_generator(datagen.flow(train_X, train_Y, batch_size = 64),
                    shuffle=True,
                    epochs = 15, validation_data = (test_X, test_Y))

optimizer = keras.optimizers.SGD(lr = 0.0001, decay = 1e-6, momentum=0.9)
model.compile( loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
model_info_6  = model.fit_generator(datagen.flow(train_X, train_Y, batch_size = 64),
                    shuffle=True,
                    epochs = 15, validation_data = (test_X, test_Y))

In [None]:

optimizer = keras.optimizers.SGD(lr = 0.00003, decay = 1e-4, momentum=0.9)
model.compile( loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
model_info_7  = model.fit_generator(datagen.flow(train_X, train_Y, batch_size = 16),
                    shuffle=True,
                    epochs = 30, validation_data = (test_X, test_Y))

___

## Visualize the Results

In [None]:
# Put the results together
acc = model_info_1.history['acc'] + model_info_2.history['acc'] + model_info_3.history['acc'] + model_info_4.history['acc'] + model_info_5.history['acc'] + model_info_6.history['acc'] #+ model_info_7.history['acc']
val_acc =  model_info_1.history['val_acc'] + model_info_2.history['val_acc'] + model_info_3.history['val_acc'] + model_info_4.history['val_acc'] + model_info_5.history['val_acc'] + model_info_6.history['val_acc'] #+ model_info_7.history['val_acc']
loss = model_info_1.history['loss'] + model_info_2.history['loss'] + model_info_3.history['loss'] + model_info_4.history['loss'] + model_info_5.history['loss'] + model_info_6.history['loss'] #+ model_info_7.history['loss']
val_loss = model_info_1.history['val_loss'] + model_info_2.history['val_loss'] + model_info_3.history['val_loss'] + model_info_4.history['val_loss'] + model_info_5.history['val_loss'] + model_info_6.history['val_loss'] #+ model_info_7.history['val_loss']

len_acc = len(acc) 
len_val_acc = len(val_acc)
len_loss = len(loss)
len_val_loss = len(val_loss)

# Visualize the reults
fig, axs = plt.subplots(1, 2, figsize=(15, 5))

# Accuracy during training
axs[0].plot(range(1, len_acc + 1), acc)
axs[0].plot(range(1, len_val_acc + 1), val_acc)
axs[0].set_title('Model Accuracy')
axs[0].set_ylabel('Accuracy')
axs[0].set_xlabel('Epoch')
axs[0].set_xticks(np.arange(1, len_acc + 1), len_acc / 10)
axs[0].legend(['train', 'val'], loc='best')

# Loss during training
axs[1].plot(range(1, len_loss +1), loss)
axs[1].plot(range(1, len_val_loss +1), val_loss)
axs[1].set_title('Model Loss')
axs[1].set_ylabel('Loss')
axs[1].set_xlabel('Epoch')
axs[1].set_xticks(np.arange(1, len_loss + 1), len_loss / 10)
axs[1].legend(['train', 'val'], loc='best')
plt.show()

In [None]:
# Runtime
runtime = 30*100/60
print('Runtime (in mins): ', runtime)

____
## Confusion Matrix

In [None]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
import itertools

def plot_confusion_matrix(cm, classes, normalize, title):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
    plt.imshow(cm, interpolation='nearest', cmap=plt.cm.binary)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)
    title_font = {'family': 'serif', 'color': 'black', 'weight': 'bold', 'size': 15}
    label_font = {'family': 'serif', 'color': 'black', 'weight': 'normal', 'size': 10}
    plt.title(title, fontdict=title_font)
    plt.ylabel('True label',fontdict=label_font)
    plt.xlabel('Predicted label',fontdict=label_font)
    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")
    plt.tight_layout()

    
    
# labels

result = model.predict(test_X)

test_Y  = test_Y_labels
pred_Y = np.argmax( result, axis = 1)


# Compute confusion matrix
cnf_matrix = confusion_matrix(test_Y, pred_Y)
np.set_printoptions(precision=2)

# Plot non-normalized confusion matrix
plt.figure(figsize=(8,8))
plot_confusion_matrix(cnf_matrix, classes=class_names, normalize=False, 
                      title='Confusion Matrix of CNN with data augmentation')

# Plot normalized confusion matrix
plt.figure(figsize=(8,8))
plot_confusion_matrix(cnf_matrix, classes=class_names, normalize=True, 
                      title='Confusion Matrix of CNN with data augmentation')

print(classification_report(test_Y, pred_Y, target_names=class_names))

___
## Save the model stucture and trained weights

In [None]:
# Save the model to JSON
model_json = model.to_json()
with open("COMP5318_CNN_90_deeper_no_fc.json", "w") as json_file:
    json_file.write(model_json)

# Save the weights to h5
model.save_weights("COMP5318_CNN_90_deeper_no_fc.h5")
print("Saved model to disk")

# load json and create model
json_file = open('COMP5318_CNN_90_deeper_no_fc.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
model = model_from_json(loaded_model_json)

# load weights into new model
model.load_weights("COMP5318_CNN_90_deeper_no_fc.h5")
print("Loaded model from disk")

#model.compile(optimizer='rmsprop',loss='mse')