In [None]:
# Simon Yoon, Steven Cho
# DL Midterm Project
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

import keras
from tensorflow.keras import Model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, Activation, MaxPooling2D
from keras.datasets.cifar10 import load_data
from tensorflow.keras.losses import sparse_categorical_crossentropy
from tensorflow.keras.optimizers import RMSprop
import matplotlib.pyplot as plt

# As specified in paper
MICROBATCH_SIZE = 4
NUM_MICROBATCHES = 400
BATCH_SIZE = MICROBATCH_SIZE * NUM_MICROBATCHES
NUM_EPOCHS = 2

NUM_CLASSES = 100
HEIGHT = 32
WIDTH = 32
NUM_CHANNELS = 3

In [None]:
from keras import backend as K
from keras.layers import Layer

class CustomBatchNorm(Layer):

    def __init__(
        self,
        renorm_clipping,
        r_max_value=3,
        d_max_value=5,
        t_delta=1e-3,
        renorm=False,
        renorm_momentum=0.99
        ):
        super(CustomBatchNorm, self).__init__()
        self.epsilon = 1e-4
        renorm_clipping = renorm_clipping or {}
        keys = ["rmax", "rmin", "dmax"]
        if set(renorm_clipping) - set(keys):
            raise ValueError(
                "Received invalid keys for `renorm_clipping` argument: "
                f"{renorm_clipping}. Supported values: {keys}."
            )

        self.renorm = renorm
        self.renorm_clipping = renorm_clipping
        self.renorm_momentum = renorm_momentum
        #self.r = 1 # default
        #self.d = 0
        self.r_max_value = r_max_value
        self.d_max_value = d_max_value
        self.t_delta = t_delta

    def build(self, input_shape):
        input_shape = tf.TensorShape(input_shape)
        rank = input_shape.rank

        self.beta = self.add_weight(
            shape=(input_shape[-1]),
            initializer="zeros",
            trainable=True,dtype = tf.float32
        )

        self.gamma = self.add_weight(
            shape=(input_shape[-1]),
            initializer="ones",
            trainable=True,dtype = tf.float32
        )

        self.moving_mean = self.add_weight(
            shape=(input_shape[-1]),
            initializer=tf.initializers.zeros,
            trainable=False, dtype = tf.float32)

        self.moving_variance = self.add_weight(
            shape=(input_shape[-1]),
            initializer=tf.initializers.ones,
            trainable=False, dtype = tf.float32)
        
    def get_moving_average(self, statistic, new_value):
        alpha = 1-self.renorm_momentum
        new_value = statistic*self.renorm_momentum + (tf.reduce_mean(new_value, [0,1,2,3]) - statistic)*(alpha)
        return new_value
    '''
    def normalise(self, x, x_mean, x_var, r, d):
        return (x - x_mean) / tf.sqrt(x_var + self.epsilon) * self.r + self.d
    '''

    #@tf.function
    def call(self, inputs, training):
        input_shape = tf.shape(inputs)
        # Original Image Dimension: [N, H, W, C]
        # Reshaped image Dimension: [N, M, H, W, C] // N = NUM_MICROBATCHES, M = MICROBATCH_SIZE
        inputs_shaped = tf.reshape(inputs, [
            input_shape[0]/MICROBATCH_SIZE,
            MICROBATCH_SIZE,
            input_shape[1],
            input_shape[2],
            input_shape[3]
      ])
        r,d = 1,0
        if training:
            axes  = [1, 2, 3] # Specific for project
            mean, var = tf.nn.moments(inputs_shaped, axes=axes, keepdims=True)
            
            #self.moving_mean.assign(self.get_moving_average(self.moving_mean, mean))
            #self.moving_variance.assign(self.get_moving_average(self.moving_variance, var))
            
            if self.renorm:
                
                std_batch = tf.sqrt(var + self.epsilon)
                #self.r.assign(tf.stop_gradient(tf.clip_by_value(tf.cast(std_batch / tf.sqrt(self.moving_variance + self.epsilon), tf.float32), self.renorm_clipping['rmin'], self.renorm_clipping['rmax'])))
                #self.d.assign(tf.stop_gradient(tf.clip_by_value(tf.cast(mean-self.moving_mean,tf.float32), -self.renorm_clipping['dmax'], self.renorm_clipping['dmax'])))
                #self.r.assign(1)
                #self.d.assign(0)
                r = std_batch / (K.sqrt(self.moving_variance + self.epsilon))
                r = K.stop_gradient(K.clip(r, 1 / self.renorm_clipping['rmax'], self.renorm_clipping['rmax']))
                d = (mean - self.moving_mean) / K.sqrt(self.moving_variance
                                                      + self.epsilon)
                d = K.stop_gradient(K.clip(d, -self.renorm_clipping['dmax'], self.renorm_clipping['dmax']))

            # mean_update = K.moving_average_update(self.moving_mean,
            #                                   tf.reduce_mean(mean, [0,1,2,3]),
            #                                   self.renorm_momentum)
            # variance_update = K.moving_average_update(self.moving_variance,
            #                                       tf.reduce_mean(std_batch ** 2, [0,1,2,3]),
            #                                       self.renorm_momentum)
            # self.add_update([mean_update, variance_update], inputs)
            self.moving_mean.assign(self.get_moving_average(self.moving_mean, mean))
            self.moving_variance.assign(self.get_moving_average(self.moving_variance, var))
            # r_val = self.r_max_value / (1 + (self.r_max_value - 1) * K.exp(-self.t))
            # d_val = (self.d_max_value
            #      / (1 + ((self.d_max_value / 1e-3) - 1) * K.exp(-(2 * self.t))))

            # self.add_update([K.update(self.r_max, r_val),
            #              K.update(self.d_max, d_val),
            #              K.update_add(self.t, self.t_delta_tensor)], inputs)    
        else:
                mean, var = self.moving_mean, self.moving_variance
        
        x = (inputs_shaped - mean) / tf.sqrt(var + self.epsilon) * r + d
        return tf.reshape(self.gamma * x + self.beta, input_shape)

In [None]:
# Load Data
(x_train, y_train), (x_test, y_test) = load_data()

# Parse numbers as floats
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')

# Normalization
x_train = x_train / 255.0
x_test = x_test / 255.0

In [None]:
# For RAM
x_train = x_train[0:6400]
y_train = y_train[0:6400]

In [None]:
curr_epoch = 0

class RenormCallback(tf.keras.callbacks.Callback):

    def on_epoch_end(self, epoch, logs=None):
        global curr_epoch
        # if not self.check_condition(epoch):
        #     return
        curr_epoch = curr_epoch + 1

def get_rmax(num_epoch):
    thresh_epoch = 20
    if num_epoch < thresh_epoch:
        return 1
    else:
        return 1 + 2*(num_epoch)/(NUM_EPOCHS)


def get_dmax(num_epoch):
    thresh_epoch = 20
    if num_epoch < thresh_epoch:
        return 0
    else:
        return 6 * (num_epoch / NUM_EPOCHS)

In [None]:
from tensorflow.keras import Input

input_shape = (HEIGHT,WIDTH,NUM_CHANNELS)

model = Sequential()
model.add(Conv2D(32, (3, 3), activation='elu', padding='same', kernel_regularizer=tf.keras.regularizers.l2(.0001), input_shape=input_shape))
model.add(CustomBatchNorm(renorm= True, renorm_clipping = {'rmax':get_rmax(curr_epoch),'rmin':1/get_rmax(curr_epoch),'dmax':get_dmax(curr_epoch)}))
model.add(Conv2D(32, (3, 3), activation='elu', padding='same', kernel_regularizer=tf.keras.regularizers.l2(.0001)))
model.add(CustomBatchNorm(renorm= True, renorm_clipping = {'rmax':get_rmax(curr_epoch),'rmin':1/get_rmax(curr_epoch),'dmax':get_dmax(curr_epoch)}))
model.add(Dropout(0.2))

model.add(Conv2D(64, (3, 3), activation='elu', padding='same', kernel_regularizer=tf.keras.regularizers.l2(.0001)))
model.add(CustomBatchNorm(renorm= True, renorm_clipping = {'rmax':get_rmax(curr_epoch),'rmin':1/get_rmax(curr_epoch),'dmax':get_dmax(curr_epoch)}))
model.add(Conv2D(64, (3, 3), activation='elu', padding='same', kernel_regularizer=tf.keras.regularizers.l2(.0001)))
model.add(CustomBatchNorm(renorm= True, renorm_clipping = {'rmax':get_rmax(curr_epoch),'rmin':1/get_rmax(curr_epoch),'dmax':get_dmax(curr_epoch)}))
model.add(Dropout(0.3))

# model.add(Conv2D(128, (3, 3), activation='elu', padding='same', kernel_regularizer=tf.keras.regularizers.l2(.0001)))
# model.add(CustomBatchNorm(renorm= True, renorm_clipping = {'rmax':get_rmax(curr_epoch),'rmin':1/get_rmax(curr_epoch),'dmax':get_dmax(curr_epoch)}))
# model.add(Conv2D(128, (3, 3), activation='elu', padding='same', kernel_regularizer=tf.keras.regularizers.l2(.0001)))
# model.add(CustomBatchNorm(renorm= True, renorm_clipping = {'rmax':get_rmax(curr_epoch),'rmin':1/get_rmax(curr_epoch),'dmax':get_dmax(curr_epoch)}))
# model.add(Dropout(0.4))

model.add(Flatten())
model.add(Dense(NUM_CLASSES, activation=tf.nn.softmax))


# Compile the model
model.compile(loss=sparse_categorical_crossentropy,
              optimizer=RMSprop(),
              metrics=['accuracy'])

model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_4 (Conv2D)           (None, 32, 32, 32)        896       
                                                                 
 custom_batch_norm_4 (Custom  (None, 32, 32, 32)       131       
 BatchNorm)                                                      
                                                                 
 conv2d_5 (Conv2D)           (None, 32, 32, 32)        9248      
                                                                 
 custom_batch_norm_5 (Custom  (None, 32, 32, 32)       131       
 BatchNorm)                                                      
                                                                 
 dropout_2 (Dropout)         (None, 32, 32, 32)        0         
                                                                 
 conv2d_6 (Conv2D)           (None, 32, 32, 64)       

In [None]:
# Fit data to model
history = model.fit(x_train, y_train,
            batch_size=BATCH_SIZE,
            epochs=NUM_EPOCHS,
            validation_split = 0.2,
            verbose = 1,
            callbacks = [RenormCallback()]
)

In [None]:
from tensorflow.keras.layers import BatchNormalization

input_shape = (HEIGHT,WIDTH,NUM_CHANNELS)

model = Sequential()
model.add(Input(
    shape=input_shape,
    batch_size=BATCH_SIZE))
model.add(Conv2D(32, (3, 3), activation='elu', padding='same', kernel_regularizer=tf.keras.regularizers.l2(.0001)))
model.add(BatchNormalization())
model.add(Conv2D(32, (3, 3), activation='elu', padding='same', kernel_regularizer=tf.keras.regularizers.l2(.0001)))
model.add(BatchNormalization())
model.add(Dropout(0.2))

model.add(Conv2D(64, (3, 3), activation='elu', padding='same', kernel_regularizer=tf.keras.regularizers.l2(.0001)))
model.add(BatchNormalization())
model.add(Conv2D(64, (3, 3), activation='elu', padding='same', kernel_regularizer=tf.keras.regularizers.l2(.0001)))
model.add(BatchNormalization())
model.add(Dropout(0.3))

model.add(Conv2D(128, (3, 3), activation='elu', padding='same', kernel_regularizer=tf.keras.regularizers.l2(.0001)))
model.add(BatchNormalization())
model.add(Conv2D(128, (3, 3), activation='elu', padding='same', kernel_regularizer=tf.keras.regularizers.l2(.0001)))
model.add(BatchNormalization())
model.add(Dropout(0.4))

model.add(Flatten())
model.add(Dense(NUM_CLASSES, activation=tf.nn.softmax))



# Compile the model
model.compile(loss=sparse_categorical_crossentropy,
              optimizer=RMSprop(),
              metrics=['accuracy'])

model.summary()

In [None]:
# Fit data to model
history2 = model.fit(x_train, y_train,
            batch_size=BATCH_SIZE,
            epochs=NUM_EPOCHS,
            validation_split = 0.2,
            verbose = 1
)

In [None]:
# Plot the history
y1=history.history['val_accuracy']
y2=history2.history['val_accuracy']
x1 = np.arange(len(y1))
k=len(y1)/len(y2)
x2 = np.arange(k,len(y1)+1,k)
fig, ax = plt.subplots()
line1, = ax.plot(x1, y1, 'r',label='Batch Renormalization')
line2, = ax.plot(x2, y2, 'b--', label='Batch Normalization')
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.legend(loc=4)
plt.show()
plt.savefig("batchrenorm.png")
