Proof of principle convolutional network in Keras for categorical classification on CIFAR-100 dataset

In [None]:
%matplotlib inline

In [None]:
# module imports
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.regularizers import l2
from tensorflow.keras.datasets import cifar100

In [None]:
# load data
(x_train, y_train), (x_test, y_test) = cifar100.load_data()

In [None]:
# print some shapes
x_train.shape, y_train.shape, x_test.shape, y_test.shape

In [None]:
# number of classes
num_classes = np.unique(y_train).size
print('number of classes: {}'.format(num_classes))

In [None]:
# divide test set into validation and test sets of equal size.
x_val = x_test[:5000]
y_val = y_test[:5000]
x_test = x_test[5000:]
y_test = y_test[5000:]

In [None]:
# Convert class vectors to categorical
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
y_val = keras.utils.to_categorical(y_val, num_classes)

In [None]:
# plot data and labels
fig, axes = plt.subplots(4, 4, sharex=True, sharey=True, figsize=(12, 12))
for i, ax in enumerate(axes.flatten()):
    ax.imshow(x_train[i, :, :], vmin=0, vmax=255)
    ax.set_title('y={}'.format(np.where(y_train[i]==1.)[0][0]))

In [None]:
def define_model(input_shape, num_classes, lr=0.001, dr=0.25,
                 clear_session=True):
    if clear_session:
        keras.backend.clear_session()

    # define model
    model = keras.models.Sequential()

    # conv layer 1
    model.add(keras.layers.Conv2D(16, (3, 3), 
                                  padding='same', 
                                  input_shape=input_shape,
                                  activation='relu',
                                  kernel_regularizer=l2(),
                                  bias_regularizer=l2(),
                                 ))

    # conv layer 2 + pooling
    model.add(keras.layers.Conv2D(32, (3, 3), 
                                  strides=(2, 2), 
                                  activation='relu',
                                  kernel_regularizer=l2(),
                                  bias_regularizer=l2(),
                                 ))
    model.add(keras.layers.MaxPooling2D(pool_size=(2, 2)))
    model.add(keras.layers.Dropout(dr))

    # conv layer 3 + pooling
    model.add(keras.layers.Conv2D(64, (3, 3), 
                                  strides=(2, 2), 
                                  activation='relu',
                                  kernel_regularizer=l2(),
                                  bias_regularizer=l2(),
                                 ))
    #model.add(keras.layers.MaxPooling2D(pool_size=(2, 2)))
    model.add(keras.layers.Dropout(dr))

    # conv layer 4 + pooling
    model.add(keras.layers.Conv2D(128, (2, 2), 
                                  strides=(1, 1), 
                                  activation='relu',
                                  kernel_regularizer=l2(),
                                  bias_regularizer=l2(),
                                 ))
    model.add(keras.layers.MaxPooling2D(pool_size=(2, 2)))
    model.add(keras.layers.Dropout(dr))

    # flatten
    model.add(keras.layers.Flatten())

    # dense layer 1
    model.add(keras.layers.Dense(128, 
                                 activation='relu',
                                 kernel_regularizer=l2(),
                                 bias_regularizer=l2(),
                                ))
    model.add(keras.layers.Dropout(dr))

    
    # dense output layer with softmax activation
    model.add(keras.layers.Dense(num_classes, 
                                 activation='softmax'))

    # optimizer
    opt = keras.optimizers.Adam(lr=lr)

    # compile model
    model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
    
    # return model
    return model

In [None]:
model = define_model(input_shape=x_train.shape[1:], num_classes=num_classes)

# print out info
model.summary()

In [None]:
# fit data
history = model.fit(x_train, y_train, batch_size=100, epochs=50, 
                   validation_data=(x_val, y_val))

In [None]:
fig, axes = plt.subplots(2, 1, sharex=True, figsize=(12, 12))
axes[0].plot(history.history['loss'], '-o', label='training loss')
axes[0].plot(history.history['val_loss'], '-o', label='validation loss')
axes[0].legend()
axes[1].plot(history.history['accuracy'], '-o', label='training accuracy')
axes[1].plot(history.history['val_accuracy'], '-o', label='validation accuracy')
axes[1].legend()
axes[1].set_xlabel('epochs')

In [None]:
# run predictions on some test samples
y_pred = model.predict(x_test.astype(float))
scores = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', scores[0])
print('Test accuracy:', scores[1])

In [None]:
# compute and plot confusion matrix
cf = tf.math.confusion_matrix(np.where(y_test == 1)[1], 
                              np.argmax(y_pred, axis=-1), 
                              num_classes=num_classes)
cf = np.array(cf)

fig, ax = plt.subplots(1, 1, figsize=(12, 12))
fig.suptitle('confusion matrix')
im = ax.matshow(cf)
ax.axis(ax.axis('tight'))
cb = plt.colorbar(im)
cb.set_label('#')
#for i in range(num_classes):
#    for j in range(num_classes):
#        ax.text(i, j, cf[i, j], fontsize=12, color='r', ha='center', va='center')

ax.set_xticks(np.arange(num_classes))
ax.set_yticks(np.arange(num_classes))
ax.set_xlabel('predicted labels')
ax.xaxis.set_label_position('top')
ax.set_ylabel('true labels')

In [None]:
# plot actual vs. predicted labels and their probabilities
fig, axes = plt.subplots(4, 4, sharex=True, sharey=True, figsize=(12, 12))
for i, ax in enumerate(axes.flatten()):
    ax.imshow(x_test[i, :, :], vmin=0, vmax=255)
    ax.set_title('$y=${}, '.format(np.where(y_test[i]==1.)[0][0]) +
                 '$\hat{y}=$' + '{} (p={:.3f})'.format(np.where(y_pred[i]==y_pred[i].max())[0][0], 
                                                       y_pred[i].max()))