In [1]:
import tensorflow
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt

In [2]:
#SELU activation
layer = keras.layers.Dense(10, activation="selu", kernel_initializer="lecun_normal")

## Batch Normalization with Keras

In [3]:
model  = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28,28]),
    keras.layers.BatchNormalization(),
    keras.layers.Dense(300, activation="elu", kernel_initializer="he_normal"),
    keras.layers.BatchNormalization(),
    keras.layers.Dense(100, activation="elu", kernel_initializer="he_normal"),
    keras.layers.BatchNormalization(),
    keras.layers.Dense(10, activation="softmax")
])

2022-05-29 22:29:46.162103: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-05-29 22:29:46.190495: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-05-29 22:29:46.190665: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-05-29 22:29:46.191395: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags

In [4]:
print(model.summary())

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten (Flatten)           (None, 784)               0         
                                                                 
 batch_normalization (BatchN  (None, 784)              3136      
 ormalization)                                                   
                                                                 
 dense_1 (Dense)             (None, 300)               235500    
                                                                 
 batch_normalization_1 (Batc  (None, 300)              1200      
 hNormalization)                                                 
                                                                 
 dense_2 (Dense)             (None, 100)               30100     
                                                                 
 batch_normalization_2 (Batc  (None, 100)              4

In [5]:
first_BN_var = [(var.name, var.trainable) for var in model.layers[1].variables]
print(first_BN_var)

[('batch_normalization/gamma:0', True), ('batch_normalization/beta:0', True), ('batch_normalization/moving_mean:0', False), ('batch_normalization/moving_variance:0', False)]


In [6]:
model.layers[1].updates

  model.layers[1].updates


[]

In [7]:
# Batch-Normalization Before Activation function
model1 = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28,28]), #flatten image!
    keras.layers.BatchNormalization(),
    keras.layers.Dense(300, kernel_initializer="he_normal", use_bias=False), # do not specify acrivation, do not use bias!
    keras.layers.BatchNormalization(),
    keras.layers.Activation("elu"), # add activation function like this!!
    keras.layers.Dense(100, kernel_initializer="he_normal", use_bias=False), # do not specify acrivation, do not use bias!
    keras.layers.BatchNormalization(),
    keras.layers.Activation("elu"), # add activation function like this!!
    
    keras.layers.Dense(10, activation="softmax")
])

In [8]:
model1.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_1 (Flatten)         (None, 784)               0         
                                                                 
 batch_normalization_3 (Batc  (None, 784)              3136      
 hNormalization)                                                 
                                                                 
 dense_4 (Dense)             (None, 300)               235200    
                                                                 
 batch_normalization_4 (Batc  (None, 300)              1200      
 hNormalization)                                                 
                                                                 
 activation (Activation)     (None, 300)               0         
                                                                 
 dense_5 (Dense)             (None, 100)              

In [9]:
# Gradient Clipping
optimizer = keras.optimizers.SGD(clipvalue=1.0)
model1.compile(loss="mse", optimizer = optimizer)

## Transfer Learning

## Optimizers

In [13]:
# 거급제곱 기반 스캐일링
optimizer = keras.optimizers.SGD(learning_rate=0.01, decay=1e-4)

In [14]:
# 지수기반 스케일링
def exponential_decay(lr0, s): # a function that returns a function with given params!
    def exponential_decay_fn(epoch):
        return lr0 * 0.1 ** (epoch / s)
    return exponential_decay_fn # return function with set paramitors

exponential_decay_fn = exponential_decay(lr0=0.01, s=20)

lr_scheduler = keras.callbacks.LearningRateScheduler(exponential_decay_fn) # this callback acceps epoch and returns lr!



## using learning rate scheduler
* model.fit(X_train, y_train, callbacks=[lr_scheduler]) 

In [15]:
# scheduling function can input now's learning rate!
def exponential_decay_fn(epoch, lr):
    return lr * 0.1 ** (1 / 20)

In [16]:
# 구간 기반 스케줄링
def piecewise_constant_fn(epoch):
    if(epoch < 5):
        return 0.01
    elif (epoch < 15):
        return 0.005
    else:
        return 0.001

In [17]:
lr_scheduler = keras.callbacks.LearningRateScheduler(piecewise_constant_fn)

In [18]:
# 성능기반 스케줄러 -> ReduceLROnPlateau
lr_scheduler = keras.callbacks.ReduceLROnPlateau(factor=0.5, patience=5)

In [20]:
X_train = np.random.randn(100,10)
# 학습률 스케줄링을 위한 또 다른 방법
s = 20 * len(X_train) // 32
learning_rate = keras.optimizers.schedules.ExponentialDecay(0.01, s, 0.1)
optimizer = keras.optimizers.SGD(learning_rate) # 이렇게 설정하면 step마다 1번씩 호출됨!

In [21]:
layer = keras.layers.Dense(100, activation="elu",
                           kernel_initializer="he_normal", # how to initialize weights
                           kernel_regularizer=keras.regularizers.l2(0.01)) # apply 0.01 to regularize!

## 규제

In [22]:
# using python's functools.partial() to make layers with set params
from functools import partial

RegularizedDense = partial( keras.layers.Dense,
                           activation="relu",
                           kernel_initializer = "he_normal",
                           kernel_regularizer = keras.regularizers.l2(0.02))

model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28,28]),
    RegularizedDense(300),
    RegularizedDense(100),
    RegularizedDense(10, activation="softmax", kernel_initializer="glorot_uniform"),
])

In [25]:
# dropout
model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28,28]),
    keras.layers.Dropout(rate=0.2),
    RegularizedDense(300),
    keras.layers.Dropout(rate=0.2),
    RegularizedDense(100),
    keras.layers.Dropout(rate=0.2),
    keras.layers.Dense(10, activation="softmax")
])

### 몬태 카를로 드롭아웃
```
y_probas = np.stack([ model(X_test, training=True) for sample in range(100)] # 이렇게 해서 앙상블 추론
y_proba = y_probas.mean(axis=0) # 앙상블 추론한 것 평균!
```


In [26]:
#when using layers like BatchNormalization that acts differently in training, we shouldn't force it to use training mode when making predictions
# so we create our own custom class

class MCDropout(keras.layers.Dropout): # __init__ function is the same as Dropout!
    def call(self, inputs):
        return super().call(inputs, training=True) # set training to true!

In [28]:
model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28,28]),
    keras.layers.BatchNormalization(),
    MCDropout(rate=0.5),
    RegularizedDense(300),
    keras.layers.BatchNormalization(),
    MCDropout(rate=0.5),
    RegularizedDense(100),
    keras.layers.BatchNormalization(),
    MCDropout(rate=0.5),
    keras.layers.Dense(10, activation="softmax")
])

In [29]:
# max-norm regulazation
layer = keras.layers.Dense(100, activation="elu", kernel_initializer="he_normal", kernel_constraint=keras.constraints.max_norm(1.)) 
# kernel_constraint 가 weight의 크기를 조절해줌!