In [101]:
from tensorflow import keras

# Vanishing/Exploding Gradients Problems

### Non Saturating Activation Function

In [102]:
leaky_relu = keras.layers.LeakyReLU(alpha=0.2)
layer = keras.layers.Dense(10, activation=leaky_relu, kernel_initializer='he_normal')

In [103]:
layer = keras.layers.Dense(10, activation='selu', kernel_initializer='lecun_normal')

### Batch Normalization

In [104]:
model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28, 28]), 
    keras.layers.BatchNormalization(), 
    keras.layers.Dense(300, activation='elu', kernel_initializer='he_normal'), 
    keras.layers.BatchNormalization(), 
    keras.layers.Dense(100, activation='elu', kernel_initializer='he_normal'), 
    keras.layers.BatchNormalization(), 
    keras.layers.Dense(10, activation='softmax')])

In [105]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 784)               0         
_________________________________________________________________
batch_normalization (BatchNo (None, 784)               3136      
_________________________________________________________________
dense_2 (Dense)              (None, 300)               235500    
_________________________________________________________________
batch_normalization_1 (Batch (None, 300)               1200      
_________________________________________________________________
dense_3 (Dense)              (None, 100)               30100     
_________________________________________________________________
batch_normalization_2 (Batch (None, 100)               400       
_________________________________________________________________
dense_4 (Dense)              (None, 10)                1

In [106]:
[(var.name, var.trainable) for var in model.layers[1].variables]

[('batch_normalization/gamma:0', True),
 ('batch_normalization/beta:0', True),
 ('batch_normalization/moving_mean:0', False),
 ('batch_normalization/moving_variance:0', False)]

In [107]:
model.layers[1].updates

[]

### Gradient Clipping

In [109]:
optimizer = keras.optimizers.SGD(clipvalue=1.0)
model.compile(loss='mse', optimizer=optimizer)

optimizer = keras.optimizers.SGD(clipnorm=1.0)

# Reusing Pretrained Layers

### Transfer Learnign with Keras

In [110]:
model_A = keras.models.load_model("my_model_A.h5")

model_A_clone = keras.models.clone_model(model_A) # So that training model B does not affect A
model_A_clone.set_weights(model_A.get_weights())

model_B_on_A = keras.models.Sequential(model_A.layers[:-1])
model_B_on_A.add(keras.layers.Dense(1, activation="sigmoid"))

OSError: SavedModel file does not exist at: my_model_A.h5/{saved_model.pbtxt|saved_model.pb}

In [None]:
for layer in model_B_on_A.layers[:-1]:
    layer.trainable = False # We are freezing the models here

model_B_on_A.compile(loss="binary_crossentropy", optimizer="sgd", metrics=["accuracy"])

In [None]:
history = model_B_on_A.fit(X_train_B, y_train_B, epochs = 4, validation_data = (X_valid_B, y_valid_B))

for layer in model_B_on_A.layers[:-1]:
    layer.trainable = True
    
optimizer = keras.optimizers.SGD(lr=1e-4) # We decreased the learning rate here to avoid damaging the reused weights

model_B_on_A.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
history = model_B_on_A.fit(X_train_B, y_train_B, epochs=16, validation_data=(X_valid_B, y_valid_B))

### Unsupervised Pretraining

In [205]:
MORSE = {'.-':    'a', '-...':  'b', '-.-.':  'c',
         '-..':   'd', '.':     'e', '..-.':  'f',
         '--.':   'g', '....':  'h', '..':    'i',
         '.---':  'j', '-.-':   'k', '.-..':  'l',
         '--':    'm', '-.':    'n', '---':   'o',
         '.--.':  'p', '--.-':  'q', '.-.':   'r',
         '...':   's', '-':     't', '..-':   'u',
         '...-':  'v', '.--':   'w', '-..-':  'x',
         '-.--':  'y', '--..':  'z', '-----': '0',
         '.----': '1', '..---': '2', '...--': '3',
         '....-': '4', '.....': '5', '-....': '6',
         '--...': '7', '---..': '8', '----.': '9'
        }

def morse_decoder(code):
    #replace this for solution
    MORSE[''] = ' '
    code = code.replace('  ', ' ').split(' ')
    code = [MORSE.get(item) for item in code]
    return str(''.join(code)).capitalize()

if __name__ == '__main__':
    print("Example:")
    print(morse_decoder('... --- ...'))

    #These "asserts" using only for self-checking and not necessary for auto-testing
    assert morse_decoder("... --- -- .   - . -..- -") == "Some text"
    assert morse_decoder("..--- ----- .---- ---..") == "2018"
    assert morse_decoder(".. -   .-- .- ...   .-   --. --- --- -..   -.. .- -.--") == "It was a good day"
    print("Coding complete? Click 'Check' to earn cool rewards!")


Example:
Sos
Coding complete? Click 'Check' to earn cool rewards!


In [183]:
MORSE[''] = ' '

In [203]:
code = "... --- -- .   - . -..- -"
code

'... --- -- .   - . -..- -'

In [204]:
code = code.replace('  ', ' ').split(' ')
code = [MORSE.get(item) for item in code]
str(''.join(code)).capitalize()

'Some text'

In [182]:
MORSE

{'.-': 'a',
 '-...': 'b',
 '-.-.': 'c',
 '-..': 'd',
 '.': 'e',
 '..-.': 'f',
 '--.': 'g',
 '....': 'h',
 '..': 'i',
 '.---': 'j',
 '-.-': 'k',
 '.-..': 'l',
 '--': 'm',
 '-.': 'n',
 '---': 'o',
 '.--.': 'p',
 '--.-': 'q',
 '.-.': 'r',
 '...': 's',
 '-': 't',
 '..-': 'u',
 '...-': 'v',
 '.--': 'w',
 '-..-': 'x',
 '-.--': 'y',
 '--..': 'z',
 '-----': '0',
 '.----': '1',
 '..---': '2',
 '...--': '3',
 '....-': '4',
 '.....': '5',
 '-....': '6',
 '--...': '7',
 '---..': '8',
 '----.': '9',
 ' ': ' '}

# Faster Optimizers

### Momentum Optimization

In [206]:
optimizer = keras.optimizers.SGD(lr=0.001, momentum=0.9)

### Nesterov Accelerated Gradient

In [207]:
optimizer = keras.optimizers.SGD(lr=0.001, momentum=0.9, nesterov=True)

### RMSProp

In [208]:
optimizer = keras.optimizers.RMSprop(lr=0.001, rho=0.9)

### Adam optimization

In [209]:
optimizer = keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999)

## Learning Rate Scheduling

### Power Scheduling

In [210]:
optimizer = keras.optimizers.SGD(lr=0.01, decay=1e-4)

### Exponential Scheduling

In [211]:
def exponential_decay(lr0, s):
    def exponential_decay_fn(epoch):
        return lr0 * 0.1 ** (epoch / s)
    return exponential_decay_fn

exponential_decay_fn = exponential_decay(lr0=0.01, s=20)

In [212]:
lr_scheduler = keras.callbacks.LearningRateScheduler(exponential_decay_fn)
history = model.fit(X_train_scaled, y_train, [...], callbacks=[lr_scheduler])

NameError: name 'X_train_scaled' is not defined

# Avoid overfitting through regularization

### L1 and L2 regularization

In [215]:
layer = keras.layers.Dense(100, activation='elu', kernel_initializer='he_normal', 
                           kernel_regularizer=keras.regularizers.l2(0.01))

In [216]:
from functools import partial

RegularizedDense = partial(keras.layers.Dense, activation='elu', kernel_initializer = 'he_normal', 
                           kernel_regularizer = keras.regularizers.l2(0.01))

model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28, 28]), 
    RegularizedDense(300), 
    RegularizedDense(100), 
    RegularizedDense(10, activation='softmax', kernel_initializer='glorot_uniform')])

### Dropout

In [217]:
model = keras.models.Sequential([
keras.layers.Flatten(input_shape=[28, 28]),
keras.layers.Dropout(rate=0.2),
keras.layers.Dense(300, activation="elu", kernel_initializer="he_normal"),
keras.layers.Dropout(rate=0.2),
keras.layers.Dense(100, activation="elu", kernel_initializer="he_normal"),
keras.layers.Dropout(rate=0.2),
keras.layers.Dense(10, activation="softmax")
])

### Monte-Carlo Dropout

In [None]:
with keras.backend.learning_phase_scope(1):
    y_probas = np.stack([model.predict(X_test_scaled) for sample in range(100)]

y_proba = y_probas.mean(axis=0)

### Max-Norm Regularization

In [218]:
keras.layers.Dense(100, activation='elu', kernel_initializer='he_normal', 
                   kernel_constraint=keras.constraints.max_norm(1) )

<tensorflow.python.keras.layers.core.Dense at 0x7f3cb42d3110>