In [21]:
import warnings
warnings.filterwarnings('ignore') # Wow nice.

In [22]:
from tensorflow.keras.optimizers import SGD, Adam
from tensorflow.keras.preprocessing.image import img_to_array, ImageDataGenerator
from tensorflow.keras import backend as K
from tensorflow.keras.callbacks import LearningRateScheduler
# Datasets
from tensorflow.keras.datasets import cifar10


from smallgooglenet import SmallGoogleNet

import matplotlib
import matplotlib.pyplot as plt
matplotlib.use('Agg') # Non-interactive

import pandas as pd

from sklearn.preprocessing import LabelBinarizer
from sklearn.metrics import classification_report
from sklearn.feature_extraction.image import extract_patches_2d
from sklearn.model_selection import train_test_split

import numpy as np
from imutils import paths, resize
import cv2
import os

## Network architecture

In [23]:
net = SmallGoogleNet()
model = net.build(32, 32, num_classes=10)

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_3 (InputLayer)            (None, 32, 32, 3)    0                                            
__________________________________________________________________________________________________
conv2d_24 (Conv2D)              (None, 32, 32, 96)   2688        input_3[0][0]                    
__________________________________________________________________________________________________
batch_normalization_24 (BatchNo (None, 32, 32, 96)   384         conv2d_24[0][0]                  
__________________________________________________________________________________________________
activation_24 (Activation)      (None, 32, 32, 96)   0           batch_normalization_24[0][0]     
__________________________________________________________________________________________________
conv2d_25 

## Loading, processing and splitting the dataset

In [24]:
print('# Loading CIFAR-10 dataset...')
(trainX, trainY), (testX, testY) = cifar10.load_data()
print('# CIFAR-10 dataset loaded!')

# Loading CIFAR-10 dataset...
# CIFAR-10 dataset loaded!


In [25]:
target_size = 32

def preprocess(image):
    ''' Preprocess an image (resizeing, extracting patches/crops...) '''
    if image is not None:
        # Extract a random crop from the image
        image = extract_patches_2d(image, (target_size, target_size), max_patches=1)[0]
        # Transform image to array
        image = img_to_array(image, data_format=None)
        # Send back the image
        return image

trainX = [preprocess(image) for image in trainX]

print('Num training images:', len(trainX))

testX = [preprocess(image) for image in testX]

print('Num test images:', len(testX))

trainX = np.array(trainX)
testX = np.array(testX)

Num training images: 50000
Num test images: 10000


In [26]:
# Scale the pixel intensity values to the range [0, 1]
def normalize(images):
    return images.astype('float') / 255.0

trainX = normalize(trainX)
testX = normalize(testX)

# Apply mean substraction
mean = np.mean(trainX, axis=0)
trainX -= mean
testX -= mean

In [27]:
# Convert the labels from integers to vectors
lb = LabelBinarizer()
trainY = lb.fit_transform(trainY)
testY = lb.fit_transform(testY)
print('After converting:', trainY)

# 1: [0 1 0 0 0 0 0 0 0 0]
# 4: [0 0 0 0 1 0 0 0 0 0]
# ...

After converting: [[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 1]
 [0 0 0 ... 0 0 1]
 ...
 [0 0 0 ... 0 0 1]
 [0 1 0 ... 0 0 0]
 [0 1 0 ... 0 0 0]]


In [28]:
# Handle data augmentation
daug = ImageDataGenerator(rotation_range=10, width_shift_range=0.1, height_shift_range=0.1,
                         shear_range=0.2, zoom_range=0.2, horizontal_flip=True, fill_mode='nearest')

## Train the network

In [29]:
# Define the number of epochs (times dataset has been seen)
num_epochs = 100
# Define a custom polynomial learning rate scheduler

$$\alpha = \alpha_{0} * (1 - e / e_{\max})^{^{p}}$$

In [30]:
init_lr = 5e-2

def polynomial_decay(epoch):
    max_epochs = num_epochs
    base_lr = init_lr
    power = 1.0
    
    return base_lr * (1 - (epoch / float(max_epochs))) ** power

In [31]:
# Set the optimizer (here using Stochastic Gradient Descent with a learning rate of X, epochs-based decay and a momentum)
# Decay slowly reduces the learning rate to reduce overfitting and get higher classification accc
sgd_opt = SGD(lr=init_lr, momentum=0.9)
# adam_opt = Adam(lr=1e-3)
# Compile the model
print('# Compiling the model...')
model.compile(loss='categorical_crossentropy', optimizer=sgd_opt, metrics=['accuracy'])
# Train the network!
print('# Training the network...')
h = model.fit_generator(
    daug.flow(trainX, trainY, batch_size=256), validation_data=(testX, testY),
    epochs=num_epochs, steps_per_epoch=len(trainX) // 256,
    callbacks=[LearningRateScheduler(polynomial_decay)], verbose=1
)

# Compiling the model...
# Training the network...
Epoch 1/100

KeyboardInterrupt: 

## Evalute the network

In [None]:
# Set labels names for easier reading
label_names = ['Airplane', 'Automobile', 'Bird',
              'Cat', 'Deer', 'Dog', 'Frog',
              'Horse', 'Ship', 'Truck']

# Evalutate the network
print('# Trained the network! Evaluating...')
preds = model.predict(testX, batch_size=256)
print(classification_report(testY.argmax(axis=1),
preds.argmax(axis=1), target_names=label_names))

print('Predictions (max):', preds)

In [None]:
%matplotlib inline
# Plot history to check for overfitting
plt.style.use('ggplot')
plt.figure(figsize=(30, 20))
plt.plot(np.arange(0, num_epochs), h.history['loss'], label='train_loss')
plt.plot(np.arange(0, num_epochs), h.history['val_loss'], label='val_loss')
plt.plot(np.arange(0, num_epochs), h.history['acc'], label='train_accuracy')
plt.plot(np.arange(0, num_epochs), h.history['val_acc'], label='val_accuracy')

plt.title('Training Loss and Accuracy')
plt.ylabel('Loss/Accuracy')
plt.xlabel('Epoch #')
plt.legend()

plt.show()

# Experiment notes

- [x] Used a custom polynomial learning rate scheduler over 100 epochs
- [x] Extracted patches from images and applied data augmentation
- [x] Removed Nesterov acceleration
- [x] Added one more CONV => RELU => BN layers set

- Initial learning rate of 5e-3: Model was slightly underfitting, could train longer
- Updated initial lr by 6e-3: Model started to slightly overfit after 80 epochs