In [1]:
""" Resuming training for modified ELU Network
    with added BatchNorm from elu_network_cifar10.ipynb
    from the same directory. Look at the end of the
    above mentioned notebook to see transference from
    non-batchNorm to BatchNorm.
"""
import keras
import keras.backend as K
from keras.models import Sequential
from keras.utils import np_utils
from keras.preprocessing.image import ImageDataGenerator
from keras.layers import Dense, Activation, Flatten, Dropout, BatchNormalization
from keras.layers import Conv2D, MaxPooling2D
from keras.datasets import cifar10
from keras import regularizers, optimizers
import numpy as np
from keras.models import load_model

Using TensorFlow backend.


In [2]:
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
# Create Validation set
x_train, x_val = x_train[:45000], x_train[45000:]
y_train, y_val = y_train[:45000], y_train[45000:]

x_train = x_train.astype('float32')
x_val = x_val.astype('float32')
x_test = x_test.astype('float32')

#z-score
mean = np.mean(x_train,axis=(0,1,2,3))
std = np.std(x_train,axis=(0,1,2,3))
x_train = (x_train-mean)/(std+1e-7)
x_val = (x_val-mean)/(std+1e-7)
x_test = (x_test-mean)/(std+1e-7)

num_classes = 10
y_train = np_utils.to_categorical(y_train,num_classes)
y_val = np_utils.to_categorical(y_val,num_classes)
y_test = np_utils.to_categorical(y_test,num_classes)

In [3]:
# e_swish_2 custom activation
def e_swish_2(x):
    return K.maximum(x*K.sigmoid(x), x*(2-K.sigmoid(x)))

In [4]:
#data augmentation
datagen = ImageDataGenerator(
    featurewise_center=False,
    samplewise_center=False,
    featurewise_std_normalization=False,
    samplewise_std_normalization=False,
    zca_whitening=False,
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True,
    vertical_flip=False
    )
datagen.fit(x_train)

In [5]:
# Will create a model with the following structure
# ELU NETWORK CIFAR-100 VERSION
#     weight_decay = 0.0005
    
#     model = Sequential()
#     # First stack
#     model.add(Conv2D(128, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay), input_shape=x_train.shape[1:]))
#     model.add(Activation(act))
#     model.add(BatchNormalization())
#     model.add(MaxPooling2D(pool_size=(2,2)))
    
#     # Second stack
#     model.add(Conv2D(128, (1,1), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
#     model.add(Activation(act))
#     model.add(Conv2D(128, (2,2), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
#     model.add(Activation(act))
#     model.add(Conv2D(200, (2,2), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
#     model.add(Activation(act))
#     model.add(Conv2D(200, (2,2), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
#     model.add(Activation(act))
#     model.add(BatchNormalization())
#     model.add(MaxPooling2D(pool_size=(2,2)))
#     model.add(Dropout(0.1))

#     # Third stack
#     model.add(Conv2D(200, (1,1), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
#     model.add(Activation(act))
#     model.add(Conv2D(220, (2,2), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
#     model.add(Activation(act))
#     model.add(Conv2D(220, (2,2), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
#     model.add(Activation(act))
#     model.add(Conv2D(220, (2,2), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
#     model.add(Activation(act))
#     model.add(BatchNormalization())
#     model.add(MaxPooling2D(pool_size=(2,2)))
#     model.add(Dropout(0.2))
    
#     # Fourth stack
#     model.add(Conv2D(220, (1,1), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
#     model.add(Activation(act))
#     model.add(Conv2D(220, (2,2), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
#     model.add(Activation(act))
#     model.add(Conv2D(220, (2,2), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
#     model.add(Activation(act))
#     model.add(BatchNormalization())
#     model.add(MaxPooling2D(pool_size=(2,2)))
#     model.add(Dropout(0.3))
    
#     # Fifth stack
#     model.add(Conv2D(240, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
#     model.add(Activation(act))
#     model.add(Conv2D(256, (2,2), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
#     model.add(Activation(act))
#     model.add(Conv2D(256, (2,2), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
#     model.add(Activation(act))
#     model.add(BatchNormalization())
#     model.add(MaxPooling2D(pool_size=(2,2)))
#     model.add(Dropout(0.4))
    
#     # Sixth stack
#     model.add(Conv2D(256, (1,1), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
#     model.add(Activation(act))
#     model.add(Conv2D(264, (1,1), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
#     model.add(Activation(act))
#     model.add(Dropout(0.4))
                     
#     # Sixth Seventh stack
#     model.add(Conv2D(364, (1,1), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
#     model.add(Activation(act))
#     model.add(Dropout(0.3))
    
#     # and last stack
#     model.add(Conv2D(10, (1,1), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
#     model.add(Activation("sigmoid"))

#     model.add(Flatten())

In [6]:
def schedule(x):
    if x<30:
        return 0.01
    elif x<45:
        return 0.005
    elif x<60:
        return 0.001
    elif x<70:
        return 0.0005
    else:
        # Start from epoch 80
        if x<90:
            return 0.0001
        else:
            return 0.0001/2

In [8]:
# Load model. See elu_network_cifar10.ipynb for details of prior training
model = load_model("elu_network_mod_e_swish_2_w_batchnorm.h5", custom_objects={"e_swish_2": e_swish_2})

In [None]:
# training - start from epoch 80
batch_size  = 100
epochs = 30 + 80

opt = keras.optimizers.SGD(lr=0.01, momentum=0.9, decay=0.0, nesterov=False)
lr_1 = keras.callbacks.LearningRateScheduler(schedule)
model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=["accuracy"])
print("Finished compiling")

####################
# Network training #
####################
                     
print("Gonna fit the model")
his = model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size),steps_per_epoch=x_train.shape[0] // batch_size,
                          epochs=epochs,verbose=1,validation_data=(x_val,y_val), callbacks=[lr_1], initial_epoch=80)
print(his.history)
print(model.evaluate(x_test, y_test))
model.save('elu_network_mod_e_swish_2_80.h5')

Finished compiling
Gonna fit the model
Epoch 81/110
