In [None]:
import tensorflow.keras as tfk
from tensorflow.keras.losses import *
from tensorflow.keras.layers import *
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint
import tensorflow.keras.backend as K
from tensorflow.keras.utils import *
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tqdm import *
import pickle
import re

Download cifar10

In [None]:
(x_train, y_train), (x_test, y_test) = tfk.datasets.cifar10.load_data()
print(x_train.shape, y_train.shape, x_test.shape, y_test.shape)

In [None]:
y_train = to_categorical(y_train, num_classes=10)
y_test = to_categorical(y_test, num_classes=10)
x_train = x_train / 255.0
x_test = x_test / 255.0

Model

In [None]:
class SignGate(Layer):
    def __init__(self, **kwargs):
        super(SignGate, self).__init__(**kwargs)

    def build(self, input_shape):
        super(SignGate, self).build(input_shape)  # Be sure to call this at the end

    def call(self, x):
        output = K.sign(K.relu(x))

        return output

    def compute_output_shape(self, input_shape):
        return input_shape

def getNetwork1Layers(model):
    pattern = re.compile("^n1_*")
    layer_list  = []

    for layer in model.layers:
        if pattern.match(layer.name):
            layer_list.append(layer)

    return layer_list

def getNetwork2Layers(model):
    pattern = re.compile("^n2_*")
    layer_list  = []

    for layer in model.layers:
        if pattern.match(layer.name):
            layer_list.append(layer)

    return layer_list

def freezeWeights(layers):
    for layer in layers:
        layer.trainable = False

img_wid = 32
def getConv4Relu():
    inputs = Input(shape = (img_wid, img_wid, 3))
    
    C1 = Conv2D(filters = 64, kernel_size = (3, 3), padding = 'same', name = "n1_c1",
                           activation = 'relu')(inputs)
    C2 = Conv2D(filters = 64, kernel_size = (3, 3), padding = 'same', name = "n1_c2",
                           activation = 'relu')(C1)
    C3 = Conv2D(filters = 128, kernel_size = (3, 3), padding = 'same', name = "n1_c3",
                           activation = 'relu')(C2)
    C4 = Conv2D(filters = 128, kernel_size = (3, 3), padding = 'same', name = "n1_c4",
                           activation = 'relu')(C3)

    G1 = GlobalAveragePooling2D()(C4)
    F1 = Flatten()(G1)
    D1 = Dense(units = 256, activation = 'relu', name = "n1_d1")(F1)
    D2 = Dense(units = 256, activation = 'relu', name = "n1_d2")(D1)

    #Output
    outputs = Dense(units = 10, activation = 'softmax', name = "output")(D2)

    model = tfk.Model(inputs = inputs, outputs = outputs)

    return model

def getConv4Galu():
    inputs = Input(shape = (img_wid, img_wid, 3))
    #V1
    C1 = Conv2D(filters = 64, kernel_size = (3, 3), padding = 'same', name = "n1_c1",
                           activation = 'relu')(inputs)
    C2 = Conv2D(filters = 64, kernel_size = (3, 3), padding = 'same', name = "n1_c2",
                           activation = 'relu')(C1)
    C3 = Conv2D(filters = 128, kernel_size = (3, 3), padding = 'same', name = "n1_c3",
                           activation = 'relu')(C2)
    C4 = Conv2D(filters = 128, kernel_size = (3, 3), padding = 'same', name = "n1_c4",
                           activation = 'relu')(C3)
    G1 = GlobalAveragePooling2D()(C4)
    F1 = Flatten()(G1)
    D1 = Dense(units = 256, activation = 'relu', name = "n1_d1")(F1)
    D2 = Dense(units = 256, activation = 'relu', name = "n1_d2")(D1)

    A1 = SignGate()(C1)
    A2 = SignGate()(C2)
    A3 = SignGate()(C3)
    A4 = SignGate()(C4)
    A5 = SignGate()(D1)
    A6 = SignGate()(D2)
    C1_G = Conv2D(filters = 64, kernel_size = (3, 3), padding = 'same', name = "n2_c1",
                           activation = 'linear')(inputs)
    C1_G = Multiply()([A1, C1_G])

    C2_G = Conv2D(filters = 64, kernel_size = (3, 3), padding = 'same', name = "n2_c2", 
                           activation = 'linear')(C1_G)
    C2_G = Multiply()([A2, C2_G])

    #V2
    C3_G = Conv2D(filters = 128, kernel_size = (3, 3), padding = 'same', name = "n2_c3",
                           activation = 'linear')(C2_G)
    C3_G = Multiply()([A3, C3_G])

    C4_G = Conv2D(filters = 128, kernel_size = (3, 3),  padding = 'same', name = "n2_c4",
                           activation = 'linear')(C3_G)
    C4_G = Multiply()([A4, C4_G])

    G1_G = GlobalAveragePooling2D()(C4_G)
    F1_G = Flatten()(G1_G)

    D1_G = Dense(units = 256, activation = 'linear', name = "n2_d1")(F1_G)
    D1_G = Multiply()([A5, D1_G])

    D2_G = Dense(units = 256, activation = 'linear', name = "n2_d2")(D1_G)
    D2_G = Multiply()([A6, D2_G])

    outputs = Dense(units = 10, activation = 'softmax', name = "output")(D2_G)

    model = tfk.Model(inputs = inputs, outputs = outputs)
    return model

eps, beta = 0.1, 4

class SoftGate(Layer):
    def __init__(self, **kwargs):
        super(SoftGate, self).__init__(**kwargs)

    def build(self, input_shape):
        super(SoftGate, self).build(input_shape)  # Be sure to call this at the end

    def call(self, x):
        activation = (1 + eps)*K.sigmoid(beta*x)
        return activation

    def compute_output_shape(self, input_shape):
        return input_shape

def getDecoupledLearning():
    inputs = Input(shape = (img_wid, img_wid, 3))
    #V1
    C1 = Conv2D(filters = 64, kernel_size = (3, 3), padding = 'same', name = "n1_c1",
                           activation = 'linear')(inputs)
    C1_A = Activation('relu')(C1)
    C2 = Conv2D(filters = 64, kernel_size = (3, 3), padding = 'same', name = "n1_c2",
                           activation = 'linear')(C1_A)
    C2_A = Activation('relu')(C2)
    C3 = Conv2D(filters = 128, kernel_size = (3, 3), padding = 'same', name = "n1_c3",
                           activation = 'linear')(C2_A)
    C3_A = Activation('relu')(C3)
    C4 = Conv2D(filters = 128, kernel_size = (3, 3), padding = 'same', name = "n1_c4",
                           activation = 'linear')(C3_A)
    C4_A = Activation('relu')(C4)
    G1 = GlobalAveragePooling2D()(C4_A)
    F1 = Flatten()(G1)
    D1 = Dense(units = 256, activation = 'linear', name = "n1_d1")(F1)
    D1_A = Activation('relu')(D1)
    D2 = Dense(units = 256, activation = 'linear', name = "n1_d2")(D1_A)

    A1 = SoftGate()(C1)
    A2 = SoftGate()(C2)
    A3 = SoftGate()(C3)
    A4 = SoftGate()(C4)
    A5 = SoftGate()(D1)
    A6 = SoftGate()(D2)
    C1_G = Conv2D(filters = 64, kernel_size = (3, 3), padding = 'same', name = "n2_c1",
                           activation = 'linear')(inputs)
    C1_G = Multiply()([A1, C1_G])

    C2_G = Conv2D(filters = 64, kernel_size = (3, 3), padding = 'same', name = "n2_c2", 
                           activation = 'linear')(C1_G)
    C2_G = Multiply()([A2, C2_G])

    #V2
    C3_G = Conv2D(filters = 128, kernel_size = (3, 3), padding = 'same', name = "n2_c3",
                           activation = 'linear')(C2_G)
    C3_G = Multiply()([A3, C3_G])

    C4_G = Conv2D(filters = 128, kernel_size = (3, 3),  padding = 'same', name = "n2_c4",
                           activation = 'linear')(C3_G)
    C4_G = Multiply()([A4, C4_G])

    G1_G = GlobalAveragePooling2D()(C4_G)
    F1_G = Flatten()(G1_G)

    D1_G = Dense(units = 256, activation = 'linear', name = "n2_d1")(F1_G)
    D1_G = Multiply()([A5, D1_G])

    D2_G = Dense(units = 256, activation = 'linear', name = "n2_d2")(D1_G)
    D2_G = Multiply()([A6, D2_G])

    outputs = Dense(units = 10, activation = 'softmax', name = "output")(D2_G)

    model = tfk.Model(inputs = inputs, outputs = outputs)
    return model

In [None]:
loss = tfk.losses.categorical_crossentropy
opt = tfk.optimizers.SGD
batch_size = 32
num_exp = 5
num_epochs = 100

history_relu = {'acc':[], 'val_acc':[], 'loss': [], 'val_loss': []}
history_flnpf = {'acc':[], 'val_acc':[], 'loss': [], 'val_loss': []}
history_frnpf_di = {'acc':[], 'val_acc':[], 'loss': [], 'val_loss': []}
history_frnpf_ii = {'acc':[], 'val_acc':[], 'loss': [], 'val_loss': []}
history_dlnpf = {'acc':[], 'val_acc':[], 'loss': [], 'val_loss': []}

Train: Standard ReLU and FLNPF

In [None]:
lr=1e-1   
for exp_i in range(num_exp):
    print("_____________EXP:{}____________".format(exp_i+1))

    model_relu = getConv4Relu()
    model_relu.compile(loss = loss, optimizer = opt(lr), metrics = ['acc'])

    filepath="weights.best.hdf5"
    checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=0, save_best_only=True, mode='max')
    callbacks_list = [checkpoint]

    history = model_relu.fit(x_train, y_train, validation_data = (x_test, y_test), verbose = 0,
                                batch_size=batch_size, epochs= num_epochs, callbacks = callbacks_list)
    history_relu['acc'].append(history.history['acc'])
    history_relu['val_acc'].append(history.history['val_acc'])
    history_relu['loss'].append(history.history['loss'])
    history_relu['val_loss'].append(history.history['val_loss'])
    print("ReLU: MAX ACC = {}, MAX VAL ACC = {}".format(np.max(history.history['acc']), 
                                                        np.max(history.history['val_acc'])))
    model_relu.load_weights("weights.best.hdf5")

    model_flnpf = getConv4Galu()
    layers_relu = getNetwork1Layers(model_relu)
    layers_flnpf = getNetwork1Layers(model_flnpf)

    for layer in layers_flnpf:
        layer.trainable = False

    model_flnpf.compile(loss = loss, optimizer = opt(lr), metrics = ['acc'])

    for layer1,layer2 in zip(layers_flnpf, layers_relu): 
        layer1.set_weights(layer2.get_weights())

    history = model_flnpf.fit(x_train, y_train, validation_data = (x_test, y_test), verbose = 0,
                                     batch_size=batch_size, epochs= num_epochs)
    history_flnpf['acc'].append(history.history['acc'])
    history_flnpf['val_acc'].append(history.history['val_acc'])
    history_flnpf['loss'].append(history.history['loss'])
    history_flnpf['val_loss'].append(history.history['val_loss'])

    print("FLNPF: MAX ACC = {}, MAX VAL ACC = {}".format(np.max(history.history['acc']), 
                                                        np.max(history.history['val_acc'])))

In [None]:
print("ReLU: max_acc = {:.4f}, mean_max_val_acc = {:.4f}, std_max_val_acc = {:.4f}".format(
                                                    np.mean(np.max(history_relu['acc'], axis = 1)), 
                                                    np.mean(np.max(history_relu['val_acc'], axis = 1)),
                                                    np.std(np.max(history_relu['val_acc'], axis = 1))))

print("FLNPF: max_acc = {:.4f}, mean_max_val_acc = {:.4f}, std_max_val_acc = {:.4f}".format(
                                                    np.mean(np.max(history_flnpf['acc'], axis = 1)), 
                                                    np.mean(np.max(history_flnpf['val_acc'], axis = 1)),
                                                    np.std(np.max(history_flnpf['val_acc'], axis = 1))))

In [None]:
file = open("h_cifar10_gconv_sgd_relu", 'wb')
pickle.dump(history_relu, file)

file = open("h_cifar10_gconv_sgd_flnpf", 'wb')
pickle.dump(history_flnpf, file)

- FRNPF(DI)

In [None]:
lr = 1e-1
for exp_i in range(num_exp):
    print("_____________EXP:{}____________".format(exp_i+1))
    model_frnpf_di = getConv4Galu()
    layers_frnpf_di_n1 = getNetwork1Layers(model_frnpf_di)
    layers_frnpf_di_n2 = getNetwork2Layers(model_frnpf_di)

    for layer in layers_frnpf_di_n1:
        layer.trainable = False

    model_frnpf_di.compile(loss = loss, optimizer = opt(lr), metrics = ['acc'])

    for layer1, layer2 in zip(layers_frnpf_di_n1, layers_frnpf_di_n2):
        layer1.set_weights(layer2.get_weights())    

    history = model_frnpf_di.fit(x_train, y_train, validation_data = (x_test, y_test), verbose = 0,
                                batch_size=batch_size, epochs= num_epochs)
    history_frnpf_di['acc'].append(history.history['acc'])
    history_frnpf_di['val_acc'].append(history.history['val_acc'])
    history_frnpf_di['loss'].append(history.history['loss'])
    history_frnpf_di['val_loss'].append(history.history['val_loss'])

    print("FRNPF(DI): MAX ACC = {:.4f}, MAX VAL ACC = {:.4f}".format(np.max(history.history['acc']), 
                                                        np.max(history.history['val_acc'])))

In [None]:
print("FRNPF(DI): max_acc = {:.4f}, mean_max_val_acc = {:.4f}, std_max_val_acc = {:.4f}".format(
                                                    np.mean(np.max(history_frnpf_di['acc'], axis = 1)), 
                                                    np.mean(np.max(history_frnpf_di['val_acc'], axis = 1)),
                                                    np.std(np.max(history_frnpf_di['val_acc'], axis = 1))))

In [None]:
file = open("h_cifar10_gconv_sgd_frnpf_di", 'wb')
pickle.dump(history_frnpf_di, file)

- FRNPF(II)

In [None]:
lr = 1e-1
for exp_i in range(num_exp):
    print("_____________EXP:{}____________".format(exp_i+1))
    model_frnpf_ii = getConv4Galu()
    layers_frnpf_ii_n1 = getNetwork1Layers(model_frnpf_ii)

    for layer in layers_frnpf_ii_n1:
        layer.trainable = False

    model_frnpf_ii.compile(loss = loss, optimizer = opt(lr), metrics = ['acc'])
    history = model_frnpf_ii.fit(x_train, y_train, validation_data = (x_test, y_test), verbose = 0,
                                batch_size=batch_size, epochs= num_epochs)    
    history_frnpf_ii['acc'].append(history.history['acc'])
    history_frnpf_ii['val_acc'].append(history.history['val_acc'])
    history_frnpf_ii['loss'].append(history.history['loss'])
    history_frnpf_ii['val_loss'].append(history.history['val_loss'])

    print("FRNPF(II): MAX ACC = {:.4f}, MAX VAL ACC = {:.4f}".format(np.max(history.history['acc']), 
                                                        np.max(history.history['val_acc'])))

In [None]:
print("FRNPF(II): max_acc = {:.4f}, mean_max_val_acc = {:.4f}, std_max_val_acc = {:.4f}".format(
                                                    np.mean(np.max(history_frnpf_ii['acc'], axis = 1)), 
                                                    np.mean(np.max(history_frnpf_ii['val_acc'], axis = 1)),
                                                    np.std(np.max(history_frnpf_ii['val_acc'], axis = 1))))

In [None]:
file = open("h_cifar10_gconv_sgd_gap_frnpf_ii", 'wb')
pickle.dump(history_frnpf_ii, file)

- DLNPF

In [None]:
lr = 1e-1
for exp_i in range(num_exp):
    print("_____________EXP:{}____________".format(exp_i+1))
    model_dlnpf = getDecoupledLearning()
    model_dlnpf.compile(loss = loss, optimizer = opt(lr), metrics = ['acc'])

    history = model_dlnpf.fit(x_train, y_train, validation_data = (x_test, y_test), verbose = 0,
                             batch_size=batch_size, epochs= num_epochs)
    
    history_dlnpf['acc'].append(history.history['acc'])
    history_dlnpf['val_acc'].append(history.history['val_acc'])
    history_dlnpf['loss'].append(history.history['loss'])
    history_dlnpf['val_loss'].append(history.history['val_loss'])

    print("dlnpf: MAX ACC = {:.4f}, MAX VAL ACC = {:.4f}".format(
                                                        np.max(history.history['acc']), 
                                                        np.max(history.history['val_acc'])))

In [None]:
print("dlnpf: max_acc = {:.4f}, mean_max_val_acc = {:.4f}, std_max_val_acc = {:.4f}".format(
                                            np.mean(np.max(history_dlnpf['acc'], axis = 1)), 
                                            np.mean(np.max(history_dlnpf['val_acc'], axis = 1)),
                                            np.std(np.max(history_dlnpf['val_acc'], axis = 1))))

In [None]:
file = open('h_cifar10_gconv_sgd_dlnpf', 'wb')
pickle.dump(history_dlnpf, file)