In [None]:
# Copyright (c) 2020 ZZH

In [None]:
import tensorflow as tf
from matplotlib import pyplot as plt
from tensorflow.keras.layers import Conv2D, BatchNormalization, Activation, AveragePooling2D, MaxPool2D
from tensorflow.keras.layers import GlobalAveragePooling2D, Dropout, Flatten, Dense, DepthwiseConv2D
from tensorflow.keras.models import Sequential
from tensorflow.keras.initializers import VarianceScaling
from tensorflow.keras import Model
import os
import numpy as np
import math

In [None]:
gpus = tf.config.experimental.list_physical_devices(device_type='GPU')
print(gpus)
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)

In [None]:
epochs = 100
lr = 0.1
batch_size = 128
REGULARIZER  = 0.0001
checkpoint_save_path =  './Model/EfficientNetB0/'
log_dir = os.path.join("Model","EfficientNetB0_logs")

In [None]:
#数据导入及数据增强
cifar10 = tf.keras.datasets.cifar10
(x_train,y_train),(x_test,y_test) = cifar10.load_data()
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
y_train = tf.keras.utils.to_categorical(y_train, num_classes=10)
y_test = tf.keras.utils.to_categorical(y_test, num_classes=10)
mean = [125.307, 122.95, 113.865]  #np.mean()
std = [62.9932, 62.0887, 66.7048]  #np.std()
for i in range(3):
    x_train[:,:,:,i] = (x_train[:,:,:,i] - mean[i]) / std[i]
    x_test[:,:,:,i] = (x_test[:,:,:,i] - mean[i]) / std[i]

DataGenTrain = tf.keras.preprocessing.image.ImageDataGenerator(
               rotation_range = 15,
               width_shift_range = 0.1,
               height_shift_range = 0.1,
               horizontal_flip = True,
               vertical_flip = False,
               shear_range=0.1,
               zoom_range = 0.1)
DataGenTrain.fit(x_train)

In [None]:
def scheduler(epoch):  #HTD(-6,3) with WarmingUp
    start = -6.0
    end = 3.0
    if epoch < 5:
        return 0.02 * epoch + 0.02
    return lr / 2.0 * (1 - math.tanh((end - start) * epoch / epochs + start))

In [None]:
def swish(x):
    return tf.nn.swish(x)

In [None]:
def conv_init():
    return VarianceScaling(scale=2.,
                           mode='fan_out',
                           distribution='normal',
                           seed=None)

In [None]:
def dense_init():
    return VarianceScaling(scale=1./3.,
                           mode='fan_out',
                           distribution='uniform',
                           seed=None)

In [None]:
class SEBlock(Model):
    def __init__(self,channels,se_ratio):
        super(SEBlock,self).__init__()
        self.channels = channels
        self.p1 = GlobalAveragePooling2D()
        self.d1 = Dense(channels//se_ratio,activation=None,kernel_initializer=conv_init(),use_bias=False,
                        kernel_regularizer=tf.keras.regularizers.l2(REGULARIZER))
        self.d2 = Dense(channels,activation='sigmoid',kernel_initializer=conv_init(),use_bias=False,
                        kernel_regularizer=tf.keras.regularizers.l2(REGULARIZER))
        self.m1 = tf.keras.layers.Multiply()
    def call(self,inputs):
        x = self.p1(inputs)
        x = self.d1(x)
        x = swish(x)
        y = self.d2(x)
        y = tf.reshape(y, [-1,1,1,self.channels])
        outputs = self.m1([inputs,y])
        return outputs

In [None]:
class MBConvBlock(Model):
    def __init__(self,channels,kernel_size,strides,expand_ratio,dropout_rate,skip):
        super(MBConvBlock,self).__init__()
        self.se_ratio = 4
        self.expand_ratio = expand_ratio
        self.dropout_rate = dropout_rate
        self.strides = strides
        self.skip = skip
        self.c1 = Conv2D(filters=channels*expand_ratio, kernel_size=1, strides=1, padding='same',use_bias=False,
                         kernel_initializer=conv_init(),kernel_regularizer=tf.keras.regularizers.l2(REGULARIZER))
        self.b1 = BatchNormalization(momentum=0.9)  
        self.c2 = DepthwiseConv2D(kernel_size=kernel_size, strides=1, padding='same', use_bias=False,
                                  depthwise_initializer=conv_init(),depthwise_regularizer=tf.keras.regularizers.l2(REGULARIZER))
        self.b2 = BatchNormalization(momentum=0.9)  
        self.se = SEBlock(channels=channels*expand_ratio,se_ratio=self.se_ratio)
        self.c3 = Conv2D(filters=channels, kernel_size=1, strides=1, padding='same', use_bias=False,
                         kernel_initializer=conv_init(),kernel_regularizer=tf.keras.regularizers.l2(REGULARIZER))
        self.b3 = BatchNormalization(momentum=0.9) 
        self.d1 = Dropout(self.dropout_rate)

    def call(self,inputs):
        x = self.c1(inputs)
        x = self.b1(x)
        x = swish(x)
        x = self.c2(x)
        x = self.b2(x)
        x = swish(x)
        x = self.se(x)
        x = self.c3(x)
        x = self.b3(x)
        outputs = self.d1(x)
        if self.skip :
            outputs += inputs
        return outputs

In [None]:
class EfficientNetB0(Model):
    def __init__(self):
        super(EfficientNetB0,self).__init__()
        self.channels = [16,24,40,80,112,192,320]
        self.kernel_size = [3,3,5,3,5,5,3]
        self.strides = [1,2,2,2,1,2,1]
        self.expand_ratio = [1,6,6,6,6,6,6]
        self.layerNum = [1,2,2,3,3,4,1]
        self.dropout_rate = 0.2
        
        self.c1 = Conv2D(filters=32, kernel_size=3, strides=1, padding='same', use_bias=False,
                        kernel_initializer=conv_init(),kernel_regularizer=tf.keras.regularizers.l2(REGULARIZER))
        self.b1 = BatchNormalization(momentum=0.9)
        
        self.block_num = 1
        self.blocks = Sequential()
        for i in range(len(self.layerNum)):
            for j in range(self.layerNum[i]):
                drop = self.dropout_rate * float(self.block_num) / sum(self.layerNum)
                if j == 0:
                    self.blocks.add(MBConvBlock(channels=self.channels[i],kernel_size=self.kernel_size[i],
                                                strides=self.strides[i],expand_ratio=self.expand_ratio[i],
                                                dropout_rate=drop,skip=False))
                else:
                    self.blocks.add(MBConvBlock(channels=self.channels[i],kernel_size=self.kernel_size[i],
                                                strides=1,expand_ratio=self.expand_ratio[i],
                                                dropout_rate=drop,skip=True))
                self.block_num += 1
                    
        self.c2 = Conv2D(filters=1280, kernel_size=1, strides=1, padding='same', use_bias=False,
                        kernel_initializer=conv_init(),kernel_regularizer=tf.keras.regularizers.l2(REGULARIZER))
        self.b2 = BatchNormalization(momentum=0.9)
        self.p1 = GlobalAveragePooling2D()
        self.d1 = Dropout(self.dropout_rate)
        self.f1 = Dense(10,activation='softmax',kernel_initializer="he_normal",
                        kernel_regularizer=tf.keras.regularizers.l2(REGULARIZER))
    def call(self,inputs):
        x = self.c1(inputs)
        x = self.b1(x)
        x = swish(x)
        x = self.blocks(x)
        x = self.c2(x)
        x = self.b2(x)
        x = swish(x)
        x = self.p1(x)
        x = self.d1(x)
        y = self.f1(x)
        return y

In [None]:
model = EfficientNetB0()

model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=lr, momentum=0.9, nesterov=True, clipnorm=2.),
              loss=tf.keras.losses.CategoricalCrossentropy(label_smoothing=0.1),
              metrics=['accuracy'])

callbacks = [
            tf.keras.callbacks.LearningRateScheduler(scheduler),  #学习率衰减表
            #tf.keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy', factor=0.1, min_lr=0.0001, patience=10, cooldown=0)
            tf.keras.callbacks.ModelCheckpoint(     #模型保存
                filepath = checkpoint_save_path,
                save_weights_only = False,
                monitor = 'val_accuracy',
                save_best_only = True),
#             tf.keras.callbacks.EarlyStopping(       #早停
#                 monitor = 'val_accuracy',
#                 patience=15, 
#                 baseline=None),
            tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1, write_graph=True, write_images=False)  #保存计算图
]

hist = model.fit(DataGenTrain.flow(x_train,y_train,batch_size=batch_size,shuffle=True),
                 epochs=epochs,
                 validation_data=(x_test,y_test),
                 validation_freq=1,
                 callbacks=callbacks)

model.summary()

In [None]:
#结果可视化
%matplotlib inline
%config InlineBackend.figure_format = 'svg'
plt.style.use({'figure.figsize':(6,4)})

plt.plot(hist.history['loss'], label='loss')
plt.plot(hist.history['val_loss'], label='val_loss')
plt.legend()
plt.show()
plt.plot(hist.history['val_accuracy'], label='val_accuracy')
plt.legend()
plt.show()

In [None]:
model.save('./')

In [None]:
#tensorboard可视化
#!tensorboard --logdir=./Model/EfficientNetB0_logs
#http://localhost:6006/

In [None]:
print('best result: {:.2f}%  ({}epochs)'.format(100*max(hist.history['val_accuracy']),1+hist.history['val_accuracy'].index(max(hist.history['val_accuracy']))))
# best result: