### a.
*Exercise: Build a DNN with 20 hidden layers of 100 neurons each (that's too many, but it's the point of this exercise). Use He initialization and the ELU activation function.*

In [1]:
import tensorflow as tf
import tensorflow.keras as keras
import numpy as np
import os
import ssl
%load_ext tensorboard

In [2]:
def clear_session():
    keras.backend.clear_session()
    tf.random.set_seed(42)
    np.random.seed(42)

In [3]:
from functools import partial

RegularizedDense = partial(keras.layers.Dense, 
                              kernel_initializer = keras.initializers.he_normal(), 
                              activation = tf.nn.elu)


In [4]:
model = keras.models.Sequential()
model.add(
    keras.layers.Flatten(input_shape = [32, 32, 3]))
for _ in range(20):
    model.add(RegularizedDense(100))



### b.
*Exercise: Using Nadam optimization and early stopping, train the network on the CIFAR10 dataset. You can load it with `keras.datasets.cifar10.load_data()`. The dataset is composed of 60,000 32 × 32–pixel color images (50,000 for training, 10,000 for testing) with 10 classes, so you'll need a softmax output layer with 10 neurons. Remember to search for the right learning rate each time you change the model's architecture or hyperparameters.*


In [5]:
ssl._create_default_https_context = ssl._create_unverified_context

In [6]:
(X_train_full, y_train_full), (X_test, y_test) = keras.datasets.cifar10.load_data()
X_valid, X_train = X_train_full[:5000], X_train_full[5000:]
y_valid, y_train = y_train_full[:5000], y_train_full[5000:]

In [7]:
assert not np.any(np.isnan(X_train_full))

In [8]:
model.add( keras.layers.Dense(10, activation= tf.nn.softmax))

In [9]:
model.compile(optimizer=keras.optimizers.Nadam(lr = 5e-5),
              loss= keras.losses.SparseCategoricalCrossentropy(),
              metrics=["accuracy"],
              )

In [10]:
early_stopping_cb = keras.callbacks.EarlyStopping(patience=20)
model_checkpoint_cb = keras.callbacks.ModelCheckpoint("cifar10_model.h5" , save_best_only=True)

run_index = 1
run_logdir = os.path.join(os.curdir, "cifar10_logs", "run_{:03d}".format(run_index))
tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)

callbacks = [early_stopping_cb, model_checkpoint_cb, tensorboard_cb]

In [11]:
%tensorboard --logdir=./cifar10_logs --port=6006


Reusing TensorBoard on port 6006 (pid 8724), started 2 days, 17:25:17 ago. (Use '!kill 8724' to kill it.)

In [12]:
model.fit(X_train, y_train, epochs= 100, verbose = 2, 
          validation_data=(X_valid, y_valid), callbacks = callbacks, workers = 0, batch_size= 128)


Epoch 1/100
Instructions for updating:
use `tf.profiler.experimental.stop` instead.
352/352 - 5s - loss: 9.3682 - accuracy: 0.1266 - val_loss: 2.6384 - val_accuracy: 0.1542
Epoch 2/100
352/352 - 4s - loss: 2.3691 - accuracy: 0.1826 - val_loss: 2.1942 - val_accuracy: 0.2126
Epoch 3/100
352/352 - 4s - loss: 2.1322 - accuracy: 0.2317 - val_loss: 2.0738 - val_accuracy: 0.2612
Epoch 4/100
352/352 - 5s - loss: 2.0402 - accuracy: 0.2617 - val_loss: 2.0124 - val_accuracy: 0.2760
Epoch 5/100
352/352 - 5s - loss: 1.9837 - accuracy: 0.2797 - val_loss: 1.9597 - val_accuracy: 0.2868
Epoch 6/100
352/352 - 4s - loss: 1.9401 - accuracy: 0.2961 - val_loss: 1.9146 - val_accuracy: 0.3164
Epoch 7/100
352/352 - 4s - loss: 1.9028 - accuracy: 0.3091 - val_loss: 1.8970 - val_accuracy: 0.3098
Epoch 8/100
352/352 - 5s - loss: 1.8707 - accuracy: 0.3199 - val_loss: 1.8566 - val_accuracy: 0.3254
Epoch 9/100
352/352 - 5s - loss: 1.8424 - accuracy: 0.3300 - val_loss: 1.8375 - val_accuracy: 0.3346
Epoch 10/100
352/35

<tensorflow.python.keras.callbacks.History at 0x1bf42be0c08>

In [13]:
loaded_model = keras.models.load_model("cifar10_model.h5")
loaded_model.evaluate(X_valid, y_valid)



[1.5637822151184082, 0.07660000026226044]

In [14]:
clear_session()


In [15]:
model = keras.models.Sequential()
model.add(keras.layers.Flatten(input_shape = [32, 32, 3]))
model.add(keras.layers.BatchNormalization())
for _ in range(20):
    model.add(keras.layers.Dense(100,kernel_initializer = keras.initializers.he_normal()))
    model.add(keras.layers.BatchNormalization())
    model.add(keras.layers.Activation("elu"))
model.add(keras.layers.Dense(10, activation=tf.nn.softmax))

In [16]:
optimizer = keras.optimizers.Nadam(lr= 5e-4)
model.compile(optimizer = optimizer, loss = keras.losses.SparseCategoricalCrossentropy(), metrics = ["accuracy"])

early_stopping_cb = keras.callbacks.EarlyStopping(patience=20)
model_stopping_cb = keras.callbacks.ModelCheckpoint("cifar10_bn_model.h5", save_best_only = True)

run_index = 1
run_logdir = os.path.join(os.curdir, "cifar10_log_dir", "run_bn_{:03d}".format(run_index))
tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)
                         
model.fit(X_train, y_train, validation_data=(X_valid, y_valid), 
          epochs = 100, workers = 0 , 
          callbacks = [early_stopping_cb, model_stopping_cb, tensorboard_cb])

best_model = keras.models.load_model("cifar10_bn_model.h5")

best_model.evaluate(X_valid, y_valid)


Epoch 1/100


### d.
*Exercise: Try replacing Batch Normalization with SELU, and make the necessary adjustements to ensure the network self-normalizes (i.e., standardize the input features, use LeCun normal initialization, make sure the DNN contains only a sequence of dense layers, etc.).*

In [None]:
clear_session()

In [None]:
model = keras.models.Sequential()
model.add(keras.layers.Flatten(input_shape = [32, 32, 3]))

for _ in range(20):
    model.add(keras.layers.Dense(100, kernel_initializer = keras.initializers.lecun_normal, activation=tf.nn.selu))
model.add(keras.layers.Dense(10, activation=tf.nn.softmax))


In [None]:
optimizer = keras.optimizers.Nadam(7e-4)
model.compile(optimizer = optimizer, loss = keras.losses.SparseCategoricalCrossentropy(), metrics = ["accuracy"])

In [None]:
early_stopping_cb = keras.callbacks.EarlyStopping(patience=20)
model_checkpoint_cb = keras.callbacks.ModelCheckpoint("cifar10_selu_model.h5", save_best_only=True)
run_index = 1 # increment every time you train the model
run_logdir = os.path.join(os.curdir, "cifar10_logs", "run_selu_{:03d}".format(run_index))
tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)
callbacks = [early_stopping_cb, model_checkpoint_cb, tensorboard_cb]

X_means = X_train.mean(axis = 0)
X_stds = X_train.stds(axis = 0)
X_train_scaled = (X_train - X_means) / X_stds
X_valid_scaled = (X_valid - X_means) / X_stds
X_test_scaled = (X_test - X_means) / X_stds

model.fit(X_train_scaled, y_train, validation_data=(X_valid_scaled, y_valid), callbacks = callbacks, workers= 0)

loaded_model = keras.models.load_model("cifar10_selu_model.h5")
loaded_model.evaluate(X_valid_scaled, y_valid)

### e.
*Exercise: Try regularizing the model with alpha dropout. Then, without retraining your model, see if you can achieve better accuracy using MC Dropout.*

In [None]:
model = keras.models.Sequential()
model.add(keras.layers.Flatten(input_shape = [32, 32, 3]))

for _ in range(20):
    model.add(keras.layers.Dense(100, kernel_initializer = keras.initializers.lecun_normal, activation=tf.nn.selu))
model.add(keras.layers.AlphaDropout(rate = 0.1))
model.add(keras.layers.Dense(10, activation=tf.nn.softmax))

In [None]:

optimizer = keras.optimizers.Nadam(lr=5e-4)
model.compile(loss=keras.losses.SparseCategoricalCrossentropy(),
              optimizer=optimizer,
              metrics=["accuracy"])

early_stopping_cb = keras.callbacks.EarlyStopping(patience=20)
model_checkpoint_cb = keras.callbacks.ModelCheckpoint("cifar10_alpha_dropout_model.h5", save_best_only=True)
run_index = 1 # increment every time you train the model
run_logdir = os.path.join(os.curdir, "cifar10_logs", "run_alpha_dropout_{:03d}".format(run_index))
tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)
callbacks = [early_stopping_cb, model_checkpoint_cb, tensorboard_cb]

X_means = X_train.mean(axis=0)
X_stds = X_train.std(axis=0)
X_train_scaled = (X_train - X_means) / X_stds
X_valid_scaled = (X_valid - X_means) / X_stds
X_test_scaled = (X_test - X_means) / X_stds

model.fit(X_train_scaled, y_train, epochs=100,
          validation_data=(X_valid_scaled, y_valid),
          callbacks=callbacks)

model = keras.models.load_model("cifar10_alpha_dropout_model.h5")
model.evaluate(X_valid_scaled, y_valid)

In [None]:
class MCAlphaDropout(keras.layers.AlphaDropout):
    def call(self, inputs):
        return super().call(inputs, training=True)

In [None]:
mc