In [1]:
import keras
import keras.backend as K
from keras.models import Sequential
from keras.utils import np_utils
from keras.preprocessing.image import ImageDataGenerator
from keras.layers import Dense, Activation, Flatten, Dropout, BatchNormalization
from keras.layers import Conv2D, MaxPooling2D
from keras.datasets import cifar10
from keras import regularizers, optimizers
from keras.models import load_model
import numpy as np


(x_train, y_train), (x_test, y_test) = cifar10.load_data()
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')

#z-score
mean = np.mean(x_train,axis=(0,1,2,3))
std = np.std(x_train,axis=(0,1,2,3))
x_train = (x_train-mean)/(std+1e-7)
x_test = (x_test-mean)/(std+1e-7)

num_classes = 10
y_train = np_utils.to_categorical(y_train,num_classes)
y_test = np_utils.to_categorical(y_test,num_classes)

Using TensorFlow backend.


In [2]:
# Custom activation function 2
# positive part of swish mirrored across x=1
def e_swish_2(x):
    return K.maximum(x*K.sigmoid(x), x*(2-K.sigmoid(x)))

In [3]:
act, act_name = e_swish_2, "e_swish_2"

In [4]:
# SimpleNet Architecture model 4 CIFAR10
baseMapNum = 32
weight_decay = 1e-2
s = 2

model = Sequential()

# Block 1
model.add(Conv2D(64, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay), input_shape=x_train.shape[1:]))
model.add(Activation(act))
model.add(BatchNormalization())

# Block 2-4
for i in range(3):   
    model.add(Conv2D(128, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
    model.add(Activation(act))
#   model.add(BatchNormalization())
    
model.add(MaxPooling2D(pool_size=(2,2), strides=s))

# Block 5-7
for i in range(3):   
    model.add(Conv2D(128, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
    model.add(Activation(act))
#     model.add(BatchNormalization())

model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2), strides=s))

# Blocks 8-9
for i in range(2):   
    model.add(Conv2D(128, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
    model.add(Activation(act))
#     model.add(BatchNormalization())
    
model.add(MaxPooling2D(pool_size=(2,2), strides=s))

# Block 10
model.add(Conv2D(128, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
model.add(Activation(act))
model.add(BatchNormalization())

# Blocks 11-12
for i in range(10):   
    model.add(Conv2D(128, (1,1), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
    model.add(Activation(act))
#     model.add(BatchNormalization())
    
# model.add(MaxPooling2D(pool_size=(2,2), strides=s))

# Block 13
model.add(Conv2D(128, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
model.add(Activation(act))

# Final Classifier
model.add(Flatten())
model.add(Dense(num_classes, activation='softmax'))

model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 32, 32, 64)        1792      
_________________________________________________________________
activation_1 (Activation)    (None, 32, 32, 64)        0         
_________________________________________________________________
batch_normalization_1 (Batch (None, 32, 32, 64)        256       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 32, 32, 128)       73856     
_________________________________________________________________
activation_2 (Activation)    (None, 32, 32, 128)       0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 32, 32, 128)       147584    
_________________________________________________________________
activation_3 (Activation)    (None, 32, 32, 128)       0         
__________

In [5]:
#data augmentation
datagen = ImageDataGenerator(
    featurewise_center=False,
    samplewise_center=False,
    featurewise_std_normalization=False,
    samplewise_std_normalization=False,
    zca_whitening=False,
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True,
    vertical_flip=False
    )
datagen.fit(x_train)

In [6]:
# training for 25 epochs
batch_size = 128
epochs=25
opt_adm = keras.optimizers.rmsprop(lr=0.001,decay=1e-5)
model.compile(loss='categorical_crossentropy', optimizer=opt_adm, metrics=['accuracy'])
his = model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size),steps_per_epoch=x_train.shape[0] // batch_size,epochs=epochs,verbose=1,validation_data=(x_test,y_test))

# his = model.fit(x_train, y_train, batch_size=batch_size, epochs=3*epochs,verbose=1,validation_data=(x_test,y_test))

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


In [7]:
print(his.history)

{'loss': [5.6209571040593662, 2.0951795005094636, 2.0523659269665155, 1.9997001000998835, 1.9939237371024927, 1.959269530734153, 1.9618728280182178, 1.9687960090490346, 1.9312500182275096, 1.9220927905149097, 1.8872283964077554, 1.8694508553775107, 1.8591746335844983, 1.8360458160005091, 1.8660785625253995, 1.8329346336001267, 1.8220010495239396, 1.7986127764588944, 1.7733096279775176, 1.7769499484414966, 1.8440589062250461, 1.7581470729373838, 1.7487582575253733, 1.7189426673388, 1.6968520753919829], 'val_acc': [0.10009999999999999, 0.16089999999999999, 0.11799999999999999, 0.2427, 0.2442, 0.17430000000000001, 0.36580000000000001, 0.3105, 0.13519999999999999, 0.17510000000000001, 0.3493, 0.38109999999999999, 0.36830000000000002, 0.3962, 0.27189999999999998, 0.13739999999999999, 0.32579999999999998, 0.36770000000000003, 0.4214, 0.30220000000000002, 0.39179999999999998, 0.38950000000000001, 0.20050000000000001, 0.36969999999999997, 0.43259999999999998], 'val_loss': [2.9543575412750243, 

In [9]:
# training for 7 epochs more (l_r reduced by 1/2)
batch_size = 128
epochs=7
opt_adm = keras.optimizers.rmsprop(lr=0.0005,decay=1e-6)
model.compile(loss='categorical_crossentropy', optimizer=opt_adm, metrics=['accuracy'])
his = model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size),steps_per_epoch=x_train.shape[0] // batch_size,epochs=epochs,verbose=1,validation_data=(x_test,y_test))

Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7


In [None]:
# training for 7 epochs more (l_r reduced by 1/2)
batch_size = 128
epochs=7
opt_adm = keras.optimizers.rmsprop(lr=0.00025,decay=1e-7)
model.compile(loss='categorical_crossentropy', optimizer=opt_adm, metrics=['accuracy'])
his = model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size),steps_per_epoch=x_train.shape[0] // batch_size,epochs=epochs,verbose=1,validation_data=(x_test,y_test))

In [16]:
print(his.history)

{'loss': [1.2293948320242074, 1.217166005092666, 1.2050382819816705, 1.1991175673196894, 1.1870351748318102, 1.1760069362850269, 1.1701783463478395], 'val_acc': [0.63929999999999998, 0.60999999999999999, 0.65720000000000001, 0.6573, 0.57589999999999997, 0.64980000000000004, 0.6321], 'val_loss': [1.180369277191162, 1.2532865208625794, 1.1286667691230774, 1.1405270289421081, 1.3990194765090942, 1.1604224590301513, 1.2130172470092773], 'acc': [0.61596554487179489, 0.62233317296092694, 0.62660410649355003, 0.6308750401217853, 0.63257940329149975, 0.63957731791478833, 0.64288578763542015]}


In [17]:
model.save("e_swish_2_latest.h5")

In [None]:
model = model.load("e_swish_2_latest.h5")

In [None]:
# training for 7 epochs more (l_r reduced by 1/2)
batch_size = 128
epochs=7
opt_adm = keras.optimizers.rmsprop(lr=0.00125,decay=1e-8)
model.compile(loss='categorical_crossentropy', optimizer=opt_adm, metrics=['accuracy'])
his = model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size),steps_per_epoch=x_train.shape[0] // batch_size,epochs=epochs,verbose=1,validation_data=(x_test,y_test))

In [None]:
print(his.history)

In [None]:
# training for 7 epochs more (l_r reduced by 1/2)
batch_size = 128
epochs=7
opt_adm = keras.optimizers.rmsprop(lr=0.00075,decay=1e-8)
model.compile(loss='categorical_crossentropy', optimizer=opt_adm, metrics=['accuracy'])
his = model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size),steps_per_epoch=x_train.shape[0] // batch_size,epochs=epochs,verbose=1,validation_data=(x_test,y_test))

In [None]:
print(his.history)