In [2]:
# import the necessary packages
from sklearn.preprocessing import LabelBinarizer

from keras.backend.tensorflow_backend import set_session
from keras.callbacks import Callback, LambdaCallback
from keras.datasets import cifar10, mnist
from keras.applications import imagenet_utils
from keras.optimizers import SGD, Adam

from keras.layers.normalization import BatchNormalization
from keras.layers.pooling import GlobalAveragePooling2D
from keras.layers.pooling import GlobalMaxPooling2D
from keras.layers.core import Flatten
from keras.layers.core import Dense
from keras.layers.core import Dropout
from keras.layers.core import Activation
from keras.layers import concatenate
from keras.models import Model
from keras import backend as K

from classification_models.keras import Classifiers
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np

In [3]:
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config = config)
set_session(sess)

In [4]:
class OneCycleScheduler(Callback):
    def __init__(self, epochs, max_lr, steps_per_epoch, moms = (0.95, 0.85), div_factor = 25, start_pct = 0.3):
        # initialize the instance variables
        self.max_lr = max_lr
        self.moms = moms
        self.div_factor = div_factor
        self.st1_epochs = int(np.floor(epochs * start_pct))
        self.st2_epochs = epochs - self.st1_epochs
        self.st1_steps = self.st1_epochs * steps_per_epoch
        self.st2_steps = self.st2_epochs * steps_per_epoch
        self.history = {"lrs" : [], "moms" : []}
    
    def __annealing_cos(self, start, end, pct):
        "Cosine anneal from `start` to `end` as pct goes from 0.0 to 1.0."

        cos_out = np.cos(np.pi * pct) + 1    
        return end + (start - end) / 2 * cos_out
    
    def on_train_begin(self, logs = None):
        # initialize the necessary variables
        self.steps_so_far = 0         
    
    def on_batch_begin(self, batch, logs = None):
        # increment the step count         
        self.steps_so_far += 1
        
        # check to determine the training phase
        if self.steps_so_far <= self.st1_steps:
            # calculate the new learning rate             
            new_lr = self.__annealing_cos(self.max_lr / self.div_factor, 
                                          self.max_lr, 
                                          self.steps_so_far / self.st1_steps)
            
            # calculate the new momentum
            new_mom = self.__annealing_cos(self.moms[0],
                                          self.moms[1],
                                          self.steps_so_far / self.st1_steps)
            
            # set the new learning rate and momentum
            K.set_value(self.model.optimizer.lr, new_lr)
            K.set_value(self.model.optimizer.momentum, new_mom)

        else:
            # calculate the new learning rate             
            new_lr = self.__annealing_cos(self.max_lr, 
                                          self.max_lr / self.div_factor, 
                                          (self.steps_so_far - self.st1_steps) / self.st2_steps)
            
            # calculate the new momentum
            new_mom = self.__annealing_cos(self.moms[1],
                                           self.moms[0],
                                           (self.steps_so_far - self.st1_steps) / self.st2_steps)
            
            # set the new learning rate and momentum
            K.set_value(self.model.optimizer.lr, new_lr)
            K.set_value(self.model.optimizer.momentum, new_mom)
            
        # update the history attribute
        self.history["lrs"].append(new_lr)
        self.history["moms"].append(new_mom)

In [5]:
class Classifier:
    @staticmethod
    def build(base_model, classes):
        #  GlobalConcatPooling  [AveragePooling + MaxPooling]
        x1 = GlobalMaxPooling2D()(base_model.output)
        x2 = GlobalAveragePooling2D()(base_model.output)
        x = concatenate([x1, x2], axis = -1)
        
        # BN => DO => FC => RELU block
        x = BatchNormalization()(x)
        x = Dropout(0.5)(x)
        x = Dense(256, kernel_initializer = "he_normal")(x)
        x = Activation("relu")(x)
        
        # Softmax classifier
        x = BatchNormalization()(x)
        x = Dropout(0.5)(x)
        x = Dense(classes, kernel_initializer = "he_normal")(x)
        x = Activation("softmax")(x)
        
        # return the constructed model architecture    
        return Model(inputs = base_model.input, outputs = x)

In [7]:
# initialize the base model
ResNet18, preprocess_input = Classifiers.get("resnet18")
base_model = ResNet18((32, 32, 3), weights = "imagenet", include_top = False)

# freeze the base model
for layer in base_model.layers:
    layer.trainable = False

# construct the classifier
model = Classifier.build(base_model, 10)

# compile the model
opt = SGD(lr = 0.01, momentum = 0.9, nesterov = True)
model.compile(loss = "categorical_crossentropy", optimizer = opt, metrics = ["accuracy"])

In [8]:
# initialize the dataset
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

# preprocess the images
x_train = preprocess_input(x_train)
x_test = preprocess_input(x_test)

# convert the labels from integers into vectors
lb = LabelBinarizer()
y_train = lb.fit_transform(y_train)
y_test = lb.fit_transform(y_test)

In [9]:
# initialize the training parameters
epochs = 5
bs = 128
steps_per_epoch = np.ceil(x_train.shape[0] / bs)
max_lr = 0.01

# initialize the one cycle scheduler
ocs = OneCycleScheduler(epochs, max_lr, steps_per_epoch)

In [10]:
# train the model
H = model.fit(x_train, y_train, validation_data = (x_test, y_test), 
          epochs = epochs, batch_size = bs,
          callbacks = [ocs])

W0403 10:22:28.226047 140187705644864 deprecation.py:323] From /home/varun/environments/pyimagesearch/lib/python3.6/site-packages/tensorflow/python/ops/math_grad.py:1250: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Train on 50000 samples, validate on 10000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [None]:
fig, ax = plt.subplots(1, 2, figsize = (8, 3))
ax[0].plot(ocs.history["lrs"])
ax[0].set_title("Learning Rate")
ax[0].set_xlabel("Iterations")
ax[1].plot(ocs.history["moms"])
ax[1].set_title("Momentum")
ax[1].set_xlabel("Iterations")
plt.show()