In [2]:
import tensorflow as tf
import numpy as np
import keras
import keras.backend as K
from keras.models import Sequential
from keras.utils import np_utils
from keras.preprocessing.image import ImageDataGenerator
from keras.layers import Dense, Activation, Flatten, Dropout, BatchNormalization
from keras.layers import Conv2D, MaxPooling2D
from keras.datasets import cifar10
from keras.models import load_model
from keras import regularizers, optimizers
from keras.initializers import glorot_normal, RandomNormal, Zeros

(x_train, y_train), (x_test, y_test) = cifar10.load_data()
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')

#z-score
mean = np.mean(x_train,axis=(0,1,2,3))
std = np.std(x_train,axis=(0,1,2,3))
x_train = (x_train-mean)/(std+1e-7)
x_test = (x_test-mean)/(std+1e-7)

num_classes = 10
y_train = np_utils.to_categorical(y_train,num_classes)# .reshape((y_train.shape[0], 10,1))
y_test = np_utils.to_categorical(y_test,num_classes)# .reshape((y_test.shape[0], 10,1))


  return f(*args, **kwds)
Using TensorFlow backend.


In [3]:
# data augmentation
datagen = ImageDataGenerator(
    featurewise_center=False,
    samplewise_center=False,
    featurewise_std_normalization=False,
    samplewise_std_normalization=False,
    zca_whitening=False,
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True,
    vertical_flip=False
    )
datagen.fit(x_train)

In [4]:
# Swish activation function
# x*sigmoid(x)
def swish(x):
    return x*K.sigmoid(x)

# Custom activation function 1
# mix between relu and positive part of swish mirrored across x=1
def e_swish_1(x):
    return K.maximum(0.0, x*(2-K.sigmoid(x)))

# Custom activation function 2
# positive part of swish mirrored across x=1
def e_swish_2(x):
    return K.maximum(x*K.sigmoid(x), x*(2-K.sigmoid(x)))

In [5]:
activations = [e_swish_2, "relu", swish, e_swish_1]

names = activations[:]
for i,a in enumerate(names):
    if not isinstance(a, str):
        names[i] = a.__name__
print(names)

['e_swish_2', 'relu', 'swish', 'e_swish_1']


In [6]:
def create_model(act, act_name):
    nn = {"act": act, "act_name": act_name}
    
    weight_decay = 1e-2
    s = 2
    model = Sequential()

    # Block 1
    model.add(Conv2D(64, (3,3), padding='same', kernel_initializer=glorot_normal(), input_shape=x_train.shape[1:]))
    model.add(BatchNormalization())
    model.add(Activation(act))
    model.add(Dropout(0.2))
    
    
    # Block 2
    model.add(Conv2D(128, (3,3), padding='same', kernel_initializer=glorot_normal()))
    model.add(BatchNormalization())
    model.add(Activation(act))
    model.add(Dropout(0.2))
    
    # Block 3
    model.add(Conv2D(128, (3,3), padding='same', kernel_initializer=RandomNormal(stddev=0.01)))
    model.add(BatchNormalization())
    model.add(Activation(act))
    model.add(Dropout(0.2))
    
    # Block 4
    model.add(Conv2D(128, (3,3), padding='same', kernel_initializer=RandomNormal(stddev=0.01)))
    model.add(BatchNormalization())
    model.add(Activation(act))
    
    # First Maxpooling
    model.add(MaxPooling2D(pool_size=(2,2), strides=s))
    model.add(Dropout(0.2))
    
    
    # Block 5
    model.add(Conv2D(128, (3,3), padding='same', kernel_initializer=RandomNormal(stddev=0.01)))
    model.add(BatchNormalization())
    model.add(Activation(act))
    model.add(Dropout(0.2))
    
    # Block 6
    model.add(Conv2D(128, (3,3), padding='same', kernel_initializer=glorot_normal()))
    model.add(BatchNormalization())
    model.add(Activation(act))
    model.add(Dropout(0.2))
    
    # Block 7
    model.add(Conv2D(256, (3,3), padding='same', kernel_initializer=glorot_normal()))
    # Second Maxpooling
    model.add(MaxPooling2D(pool_size=(2,2), strides=s))
    model.add(BatchNormalization())
    model.add(Activation(act))
    model.add(Dropout(0.2))
    
    
    # Block 8
    model.add(Conv2D(256, (3,3), padding='same', kernel_initializer=glorot_normal()))
    model.add(BatchNormalization())
    model.add(Activation(act))
    model.add(Dropout(0.2))
    
    # Block 9
    model.add(Conv2D(256, (3,3), padding='same', kernel_initializer=glorot_normal()))
    model.add(BatchNormalization())
    model.add(Activation(act))
    model.add(Dropout(0.2))
    # Third Maxpooling
    model.add(MaxPooling2D(pool_size=(2,2), strides=s))
    
    
    # Block 10
    model.add(Conv2D(512, (3,3), padding='same', kernel_initializer=glorot_normal()))
    model.add(BatchNormalization())
    model.add(Activation(act))
    model.add(Dropout(0.2))

    # Block 11  
    model.add(Conv2D(2048, (1,1), padding='same', kernel_initializer=glorot_normal()))
    model.add(Activation(act))
    model.add(Dropout(0.2))
    
    # Block 12  
    model.add(Conv2D(256, (1,1), padding='same', kernel_initializer=glorot_normal()))
    model.add(Activation(act))
    # Fourth Maxpooling
    model.add(MaxPooling2D(pool_size=(2,2), strides=s))
    model.add(Dropout(0.2))


    # Block 13
    model.add(Conv2D(256, (3,3), padding='same', kernel_initializer=glorot_normal()))
    model.add(Activation(act))
    # Fifth Maxpooling
    model.add(MaxPooling2D(pool_size=(2,2), strides=s))

    # Final Classifier
    model.add(Flatten())
    model.add(Dense(num_classes, activation='softmax'))

    nn["model"] = model
    return nn

In [7]:
models = []

In [None]:
for i, act in enumerate(activations):
    batch_size = 128
    nn = create_model(act, names[i])
    
    nn["model"].summary()
    model = nn["model"]

    # First training for 50 epochs
    epochs = 25*2
    opt_adm = keras.optimizers.Adadelta()
    model.compile(loss='categorical_crossentropy', optimizer=opt_adm, metrics=['accuracy'])
    part_1 = model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size),steps_per_epoch=x_train.shape[0] // batch_size,epochs=epochs,verbose=1,validation_data=(x_test,y_test))
    nn["part_1"] = part_1.history
    print(nn["part_1"])
    model.save_weights("simplenet_generic_"+nn["act_name"]+"_"+str(epochs)+".h5")

    # Training for 30 epochs more
    epochs = 30
    opt_adm = keras.optimizers.Adadelta(lr=0.7, rho=0.9)
    model.compile(loss='categorical_crossentropy', optimizer=opt_adm, metrics=['accuracy'])
    part_2= model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size),steps_per_epoch=x_train.shape[0] // batch_size,epochs=epochs,verbose=1,validation_data=(x_test,y_test))
    nn["part_2"] = part_2.history
    print(nn["part_2"])
    model.save_weights("simplenet_generic_"+nn["act_name"]+"_"+str(epochs)+".h5")

    # First training for 25 epochs
    epochs = 25
    opt_adm = keras.optimizers.Adadelta(lr=0.6, rho=0.90)
    model.compile(loss='categorical_crossentropy', optimizer=opt_adm, metrics=['accuracy'])
    part_3 = model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size),steps_per_epoch=x_train.shape[0] // batch_size,epochs=epochs,verbose=1,validation_data=(x_test,y_test))
    nn["part_3"] = part_3.history
    print(nn["part_3"])
    model.save_weights("simplenet_generic_"+nn["act_name"]+"_"+str(epochs)+".h5")

    # First training for 50 epochs
    epochs = 20
    opt_adm = keras.optimizers.Adadelta(lr=0.5, rho=0.95)
    model.compile(loss='categorical_crossentropy', optimizer=opt_adm, metrics=['accuracy'])
    part_4 = model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size),steps_per_epoch=x_train.shape[0] // batch_size,epochs=epochs,verbose=1,validation_data=(x_test,y_test))
    nn["part_4"] = part_4.history
    print(nn["part_4"])
    model.save_weights("simplenet_generic_"+nn["act_name"]+"_"+str(epochs)+".h5")

    del nn["model"]
    print("\n", nn) # Ensure everything's ok x2
    models.append(nn)
     

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 32, 32, 64)        1792      
_________________________________________________________________
batch_normalization_1 (Batch (None, 32, 32, 64)        256       
_________________________________________________________________
activation_1 (Activation)    (None, 32, 32, 64)        0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 32, 32, 64)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 32, 32, 128)       73856     
_________________________________________________________________
batch_normalization_2 (Batch (None, 32, 32, 128)       512       
_________________________________________________________________
activation_2 (Activation)    (None, 32, 32, 128)       0         
__________

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
{'val_loss': [0.25670622930526732, 0.2506920605659485, 0.2570322861671448, 0.24995032482147217, 0.24792020115852356, 0.25483046464920045, 0.26199334192276003, 0.25575393385887146, 0.25578167934417723, 0.25433237800598146, 0.25416022162437441, 0.24770426959991454, 0.2429472041130066, 0.25278541975021362, 0.25071349415779115, 0.26012125740051267, 0.25867460644245149, 0.24947773661613465, 0.25910815315246583, 0.25449635801315307, 0.25102498233318327, 0.24611578054428102, 0.2657999581336975, 0.24955371644496918, 0.26370981616973876, 0.24533061189651489, 0.2494860679626465, 0.2480485725402832, 0.2477382166147232, 0.24590379362106324], 'val_acc': 

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_14 (Conv2D)           (None, 32, 32, 64)        1792      
_________________________________________________________________
batch_normalization_11 (Batc (None, 32, 32, 64)        256       
_________________________________________________________________
activation_14 (Activation)   (None, 32, 32, 64)        0         
_________________________________________________________________
dropout_13 (Dropout)         (None, 32, 32, 64)        0         
_________________________________________________________________
conv2d_15 (Conv2D)           (None, 32, 32, 128)       73856     
_________________________________________________________________
batch_normalization_12 (Batc (None, 32, 32, 128)       512       
_________________________________________________________________
activation_15 (Activation)   (None, 32, 32, 128)       0         
__________

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
{'val_loss': [0.24208446426391603, 0.24077992639541626, 0.25018725533485414, 0.23401111135482788, 0.24810904757380486, 0.24626069469451906, 0.2483254460811615, 0.23308508322238922, 0.23142802014350891, 0.23782422363758088, 0.23345917816162109, 0.23357124180793762, 0.2387777265071869, 0.24787405748367308, 0.23591543724536895, 0.23190798864364623, 0.23057176761627196, 0.24260770282745361, 0.22055496859550477, 0.2356174066543579, 0.23012672810554505, 0.22161635127067567, 0.22798151364326477, 0.25241847162246706, 0.22933466954231263, 0.22882126305103301, 0.23520413522720338, 0.24185513246059417, 0.24763635144233703, 0.24081270687580109], 'val_ac

In [8]:
for i, act in zip([2,3], activations[-2:]):
    batch_size = 128
    nn = create_model(act, names[i])
    
    nn["model"].summary()
    model = nn["model"]

    # First training for 50 epochs
    epochs = 25*2
    opt_adm = keras.optimizers.Adadelta()
    model.compile(loss='categorical_crossentropy', optimizer=opt_adm, metrics=['accuracy'])
    part_1 = model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size),steps_per_epoch=x_train.shape[0] // batch_size,epochs=epochs,verbose=1,validation_data=(x_test,y_test))
    nn["part_1"] = part_1.history
    print(nn["part_1"])
    model.save_weights("simplenet_generic_"+nn["act_name"]+"_"+str(epochs)+".h5")

    # Training for 30 epochs more
    epochs = 30
    opt_adm = keras.optimizers.Adadelta(lr=0.7, rho=0.9)
    model.compile(loss='categorical_crossentropy', optimizer=opt_adm, metrics=['accuracy'])
    part_2= model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size),steps_per_epoch=x_train.shape[0] // batch_size,epochs=epochs,verbose=1,validation_data=(x_test,y_test))
    nn["part_2"] = part_2.history
    print(nn["part_2"])
    model.save_weights("simplenet_generic_"+nn["act_name"]+"_"+str(epochs)+".h5")

    # First training for 25 epochs
    epochs = 25
    opt_adm = keras.optimizers.Adadelta(lr=0.6, rho=0.90)
    model.compile(loss='categorical_crossentropy', optimizer=opt_adm, metrics=['accuracy'])
    part_3 = model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size),steps_per_epoch=x_train.shape[0] // batch_size,epochs=epochs,verbose=1,validation_data=(x_test,y_test))
    nn["part_3"] = part_3.history
    print(nn["part_3"])
    model.save_weights("simplenet_generic_"+nn["act_name"]+"_"+str(epochs)+".h5")

    # First training for 50 epochs
    epochs = 20
    opt_adm = keras.optimizers.Adadelta(lr=0.5, rho=0.95)
    model.compile(loss='categorical_crossentropy', optimizer=opt_adm, metrics=['accuracy'])
    part_4 = model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size),steps_per_epoch=x_train.shape[0] // batch_size,epochs=epochs,verbose=1,validation_data=(x_test,y_test))
    nn["part_4"] = part_4.history
    print(nn["part_4"])
    model.save_weights("simplenet_generic_"+nn["act_name"]+"_"+str(epochs)+".h5")

    del nn["model"]
    print("\n", nn) # Ensure everything's ok x2
    models.append(nn)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 32, 32, 64)        1792      
_________________________________________________________________
batch_normalization_1 (Batch (None, 32, 32, 64)        256       
_________________________________________________________________
activation_1 (Activation)    (None, 32, 32, 64)        0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 32, 32, 64)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 32, 32, 128)       73856     
_________________________________________________________________
batch_normalization_2 (Batch (None, 32, 32, 128)       512       
_________________________________________________________________
activation_2 (Activation)    (None, 32, 32, 128)       0         
__________

Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
{'val_loss': [1.2701581336975099, 0.91327721881866453, 0.81222170372009272, 0.63881399259567262, 0.56879725151062011, 0.51824476156234744, 0.51574898700714111, 0.5515461269378662, 0.4383820701599121, 0.38769240112304687, 0.39469577107429504, 0.44091899018287661, 0.36313070917129514, 0.35967272009849549, 0.34635223903656004, 0.36449624381065371, 0.33495946302413943, 0.32296953873634338, 0.32365637059211733, 0.32380144996643068, 0.31632360115051267, 0.3151200714

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30

KeyboardInterrupt: 