In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.datasets import mnist
import utils

# MNIST dataset params
num_classes = 10 # 0-9 digits
num_features = 784 # img shape: 28*28

# load data
(X_train, y_train), (X_test, y_test) = mnist.load_data()

X_train, y_train, X_test, y_test = utils.preprocess(X_train, y_train, X_test, y_test, num_classes, num_features, print_summary=False)

In [2]:
# network params
n_hidden_1 = 128
n_hidden_2 = 256

# **$1$. MLP with Adam optimizer**

## $a$. Default Case

In [3]:
# create model
mlp_adam_1 = utils.create_mlp("MLP_Adam_1", 
                            n_hidden_1, n_hidden_2, 
                            num_features, num_classes)

Model: "MLP_Adam_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 hidden_layer_1 (Dense)      (None, 128)               100480    
                                                                 
 hidden_layer_2 (Dense)      (None, 256)               33024     
                                                                 
 output_layer (Dense)        (None, 10)                2570      
                                                                 
Total params: 136,074
Trainable params: 136,074
Non-trainable params: 0
_________________________________________________________________


In [4]:
# compile model
mlp_adam_1.compile(optimizer=tf.keras.optimizers.Adam(),
                loss=tf.keras.losses.CategoricalCrossentropy(), 
                metrics=['accuracy'])

# train model
mlp_adam_1_history = mlp_adam_1.fit(X_train, y_train, batch_size=256, epochs=100, 
                        validation_data=(X_test, y_test), verbose=1)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
 21/219 [=>............................] - ETA: 0s - loss: 0.0075 - accuracy: 0.9981

KeyboardInterrupt: 

In [None]:
# display results
utils.disp_results(mlp_adam_1, X_train, y_train, X_test, y_test, mlp_adam_1_history)

## $b$. $L2$ regularization

### $i)$ $\;\alpha=0.1$

In [None]:
a_reg = 0.1

In [None]:
# create model
mlp_adam_2 = utils.create_mlp("MLP_Adam_2", 
                            n_hidden_1, n_hidden_2, 
                            num_features, num_classes, kernel_reg="l2", a_reg=a_reg)

In [None]:
# compile model
mlp_adam_2.compile(optimizer=tf.keras.optimizers.Adam(),
                loss=tf.keras.losses.CategoricalCrossentropy(), 
                metrics=['accuracy'])

# train model
mlp_adam_2_history = mlp_adam_2.fit(X_train, y_train, batch_size=256, epochs=100, 
                        validation_data=(X_test, y_test), verbose=1)

In [None]:
# display results
utils.disp_results(mlp_adam_2, X_train, y_train, X_test, y_test, mlp_adam_2_history)

### $ii)$ $\;\alpha=0.01$

In [None]:
a_reg = 0.01

In [None]:
# create model
mlp_adam_3 = utils.create_mlp("MLP_Adam_3", 
                            n_hidden_1, n_hidden_2, 
                            num_features, num_classes, kernel_reg="l2", a_reg=a_reg)

In [None]:
# compile model
mlp_adam_3.compile(optimizer=tf.keras.optimizers.Adam(),
                loss=tf.keras.losses.CategoricalCrossentropy(), 
                metrics=['accuracy'])

# train model
mlp_adam_3_history = mlp_adam_3.fit(X_train, y_train, batch_size=256, epochs=100, 
                        validation_data=(X_test, y_test), verbose=1)

In [None]:
# display results
utils.disp_results(mlp_adam_3, X_train, y_train, X_test, y_test, mlp_adam_3_history)

### $iii)$ $\;\alpha=0.001$

In [None]:
a_reg = 0.001

In [None]:
# create model
mlp_adam_4 = utils.create_mlp("MLP_Adam_4", 
                            n_hidden_1, n_hidden_2, 
                            num_features, num_classes, kernel_reg="l2", a_reg=a_reg)

In [None]:
# compile model
mlp_adam_4.compile(optimizer=tf.keras.optimizers.Adam(),
                loss=tf.keras.losses.CategoricalCrossentropy(), 
                metrics=['accuracy'])

# train model
mlp_adam_4_history = mlp_adam_4.fit(X_train, y_train, batch_size=256, epochs=100, 
                        validation_data=(X_test, y_test), verbose=1)

In [None]:
# display results
utils.disp_results(mlp_adam_4, X_train, y_train, X_test, y_test, mlp_adam_4_history)

## $c$. $L1$ regularization ($\alpha=0.01$) & Dropout ($probability=0.3$)

In [None]:
a_reg = 0.01
dropout_prob = 0.3

In [None]:
# create model
mlp_adam_5 = utils.create_mlp("MLP_Adam_5", 
                            n_hidden_1, n_hidden_2, 
                            num_features, num_classes, kernel_reg="l1", a_reg=a_reg,
                            dropout_layers=True, dropout_prob=dropout_prob)

In [None]:
# compile model
mlp_adam_5.compile(optimizer=tf.keras.optimizers.Adam(),
                loss=tf.keras.losses.CategoricalCrossentropy(), 
                metrics=['accuracy'])

# train model
mlp_adam_5_history = mlp_adam_5.fit(X_train, y_train, batch_size=256, epochs=100, 
                        validation_data=(X_test, y_test), verbose=1)

In [None]:
# display results
utils.disp_results(mlp_adam_5, X_train, y_train, X_test, y_test, mlp_adam_5_history)

# **$2$. MLP with RMSProp optimizer**

## $a$. Default Case

### $i)$ $\;\rho=0.01$

In [None]:
rho = 0.01

In [None]:
# create model
mlp_rmsprop_1 = utils.create_mlp("MLP_RMSProp_1", 
                            n_hidden_1, n_hidden_2, 
                            num_features, num_classes)

In [None]:
# compile model
mlp_rmsprop_1.compile(optimizer=tf.keras.optimizers.RMSprop(learning_rate=0.001, rho=rho),
                    loss=tf.keras.losses.CategoricalCrossentropy(), 
                    metrics=['accuracy'])

# train model
mlp_rmsprop_1_history = mlp_rmsprop_1.fit(X_train, y_train, batch_size=256, epochs=100, validation_data=(X_test, y_test), verbose=1)

In [None]:
# display results
utils.disp_results(mlp_rmsprop_1, X_train, y_train, X_test, y_test, mlp_rmsprop_1_history)

### $ii)$ $\;\rho=0.99$

In [None]:
rho = 0.99

In [None]:
# create model
mlp_rmsprop_2 = utils.create_mlp("MLP_RMSProp_2", 
                            n_hidden_1, n_hidden_2, 
                            num_features, num_classes)

In [None]:
# compile model
mlp_rmsprop_2.compile(optimizer=tf.keras.optimizers.RMSprop(learning_rate=0.001, rho=rho),
                    loss=tf.keras.losses.CategoricalCrossentropy(), 
                    metrics=['accuracy'])

# train model
mlp_rmsprop_2_history = mlp_rmsprop_2.fit(X_train, y_train, batch_size=256, epochs=100, validation_data=(X_test, y_test), verbose=1)

In [None]:
# display results
utils.disp_results(mlp_rmsprop_2, X_train, y_train, X_test, y_test, mlp_rmsprop_2_history)

## $b$. $L2$ regularization ($\alpha=0.01$)

In [None]:
a_reg = 0.01

### $i)$ $\;\rho=0.01$

In [None]:
rho = 0.01

In [None]:
# create model
mlp_rmsprop_3 = utils.create_mlp("MLP_RMSProp_3", 
                            n_hidden_1, n_hidden_2, 
                            num_features, num_classes,
                            kernel_reg="l2", a_reg=a_reg)

In [None]:
# compile model
mlp_rmsprop_3.compile(optimizer=tf.keras.optimizers.RMSprop(learning_rate=0.001, rho=rho),
                    loss=tf.keras.losses.CategoricalCrossentropy(), 
                    metrics=['accuracy'])

# train model
mlp_rmsprop_3_history = mlp_rmsprop_3.fit(X_train, y_train, batch_size=256, epochs=100, validation_data=(X_test, y_test), verbose=1)

In [None]:
# display results
utils.disp_results(mlp_rmsprop_3, X_train, y_train, X_test, y_test, mlp_rmsprop_3_history)

### $ii)$ $\;\rho=0.99$

In [None]:
rho = 0.99

In [None]:
# create model
mlp_rmsprop_4 = utils.create_mlp("MLP_RMSProp_4", 
                            n_hidden_1, n_hidden_2, 
                            num_features, num_classes,
                            kernel_reg="l2", a_reg=a_reg)

In [None]:
# compile model
mlp_rmsprop_4.compile(optimizer=tf.keras.optimizers.RMSprop(learning_rate=0.001, rho=rho),
                    loss=tf.keras.losses.CategoricalCrossentropy(), 
                    metrics=['accuracy'])

# train model
mlp_rmsprop_4_history = mlp_rmsprop_4.fit(X_train, y_train, batch_size=256, epochs=100, validation_data=(X_test, y_test), verbose=1)

In [None]:
# display results
utils.disp_results(mlp_rmsprop_4, X_train, y_train, X_test, y_test, mlp_rmsprop_4_history)

## $c$. $L1$ regularization ($\alpha=0.01$) & Dropout ($probability=0.3$)

In [None]:
a_reg = 0.01
dropout_prob = 0.3

### $i)$ $\;\rho=0.01$

In [None]:
rho = 0.01

In [None]:
# create model
mlp_rmsprop_5 = utils.create_mlp("MLP_RMSProp_5", 
                            n_hidden_1, n_hidden_2, 
                            num_features, num_classes, kernel_reg="l1", a_reg=a_reg,
                            dropout_layers=True, dropout_prob=dropout_prob)

In [None]:
# compile model
mlp_rmsprop_5.compile(optimizer=tf.keras.optimizers.RMSprop(learning_rate=0.001, rho=rho),
                    loss=tf.keras.losses.CategoricalCrossentropy(), 
                    metrics=['accuracy'])

# train model
mlp_rmsprop_5_history = mlp_rmsprop_5.fit(X_train, y_train, batch_size=256, epochs=100, validation_data=(X_test, y_test), verbose=1)

In [None]:
# display results
utils.disp_results(mlp_rmsprop_5, X_train, y_train, X_test, y_test, mlp_rmsprop_5_history)

### $ii)$ $\;\rho=0.99$

In [None]:
rho = 0.99

In [None]:
# create model
mlp_rmsprop_6 = utils.create_mlp("MLP_RMSProp_6", 
                            n_hidden_1, n_hidden_2, 
                            num_features, num_classes, kernel_reg="l1", a_reg=a_reg,
                            dropout_layers=True, dropout_prob=dropout_prob)

In [None]:
# compile model
mlp_rmsprop_6.compile(optimizer=tf.keras.optimizers.RMSprop(learning_rate=0.001, rho=rho),
                    loss=tf.keras.losses.CategoricalCrossentropy(), 
                    metrics=['accuracy'])

# train model
mlp_rmsprop_6_history = mlp_rmsprop_6.fit(X_train, y_train, batch_size=256, epochs=100, validation_data=(X_test, y_test), verbose=1)

In [None]:
# display results
utils.disp_results(mlp_rmsprop_6, X_train, y_train, X_test, y_test, mlp_rmsprop_6_history)

# **$3$. MLP with SGD optimizer**

***Weight initializer**: Gaussian distribution, mean = 10*

In [None]:
gaussian_mean = 10

## $a$. Default Case

In [None]:
# create model
mlp_sgd_1 = utils.create_mlp("MLP_SGD_1", 
                            n_hidden_1, n_hidden_2, 
                            num_features, num_classes,
                            gaussian_init=True, gaussian_mean=gaussian_mean)

In [None]:
# compile model
mlp_sgd_1.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=0.01),
              loss=tf.keras.losses.CategoricalCrossentropy(), 
              metrics=['accuracy'])

# train model
mlp_sgd_1_history = mlp_sgd_1.fit(X_train, y_train, batch_size=256, epochs=100, validation_data=(X_test, y_test), verbose=1)

In [None]:
# display results
utils.disp_results(mlp_sgd_1, X_train, y_train, X_test, y_test, mlp_sgd_1_history)

## $b$. $L2$ regularization

### $i)$ $\;\alpha=0.1$

In [None]:
a_reg = 0.1

In [None]:
# create model
mlp_sgd_2 = utils.create_mlp("MLP_SGD_2", 
                            n_hidden_1, n_hidden_2, 
                            num_features, num_classes,
                            gaussian_init=True, gaussian_mean=gaussian_mean,
                            kernel_reg="l2", a_reg=a_reg)

In [None]:
# compile model
mlp_sgd_2.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=0.01),
              loss=tf.keras.losses.CategoricalCrossentropy(), 
              metrics=['accuracy'])

# train model
mlp_sgd_2_history = mlp_sgd_2.fit(X_train, y_train, batch_size=256, epochs=100, validation_data=(X_test, y_test), verbose=1)

In [None]:
# display results
utils.disp_results(mlp_sgd_2, X_train, y_train, X_test, y_test, mlp_sgd_2_history)

### $ii)$ $\;\alpha=0.01$

In [None]:
a_reg = 0.01

In [None]:
# create model
mlp_sgd_3 = utils.create_mlp("MLP_SGD_3", 
                            n_hidden_1, n_hidden_2, 
                            num_features, num_classes,
                            gaussian_init=True, gaussian_mean=gaussian_mean,
                            kernel_reg="l2", a_reg=a_reg)

In [None]:
# compile model
mlp_sgd_3.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=0.01),
              loss=tf.keras.losses.CategoricalCrossentropy(), 
              metrics=['accuracy'])

# train model
mlp_sgd_3_history = mlp_sgd_3.fit(X_train, y_train, batch_size=256, epochs=100, validation_data=(X_test, y_test), verbose=1)

In [None]:
# display results
utils.disp_results(mlp_sgd_3, X_train, y_train, X_test, y_test, mlp_sgd_3_history)

## $c$. $L1$ regularization ($\alpha=0.01$) & Dropout ($probability=0.3$)

In [None]:
a_reg = 0.01
dropout_prob = 0.3

In [None]:
# create model
mlp_sgd_4 = utils.create_mlp("MLP_SGD_4", 
                            n_hidden_1, n_hidden_2, 
                            num_features, num_classes,
                            gaussian_init=True, gaussian_mean=gaussian_mean,
                            kernel_reg="l1", a_reg=a_reg,
                            dropout_layers=True, dropout_prob=dropout_prob)

In [None]:
# compile model
mlp_sgd_4.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=0.01),
              loss=tf.keras.losses.CategoricalCrossentropy(), 
              metrics=['accuracy'])

# train model
mlp_sgd_4_history = mlp_sgd_4.fit(X_train, y_train, batch_size=256, epochs=100, validation_data=(X_test, y_test), verbose=1)

In [None]:
# display results
utils.disp_results(mlp_sgd_4, X_train, y_train, X_test, y_test, mlp_sgd_4_history)