### Part 2: Adult Optimization
* Traing 5 neural network architectures with both ReLU and SGD optimizers.
* Plotting the Test accuracies for all the 5 networks and comparing them.
* The models are trained and the test accuracies are stored in .csv files using CSVLOGGER callbacks.

In [22]:
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow.keras.callbacks import CSVLogger

%matplotlib inline 

%tensorflow_version 2.x

In [23]:
from google.colab import drive
drive.mount('drive')

Drive already mounted at drive; to attempt to forcibly remount, call drive.mount("drive", force_remount=True).


In [24]:
mnist = tf.keras.datasets.mnist

In [25]:
(x_train, y_train), (x_test, y_test) = mnist.load_data() # load data
assert x_train.shape == (60000, 28, 28)
assert x_test.shape == (10000, 28, 28)
assert y_train.shape == (60000,)
assert y_test.shape == (10000,)

In [27]:
# scaling. the data-set

x_train, x_test = x_train/255, x_test/255
# X = pd.concat([x_train, x_test])
CLASSES = 10

In [6]:
# class TestCallback(Callback):
#     def __init__(self, test_data):
#         self.test_data = test_data

#     def on_epoch_end(self, epoch, logs={}):
#         x, y = self.test_data
#         loss, acc = self.model.evaluate(x, y, verbose=0)
#         print('\nTesting loss: {}, acc: {}\n'.format(loss, acc))

Create five different networks that share the same architecture as follows:

a. Activation function: the logistic sigmoid function; initialization: random numbers generated from the normal distribution ($\mu$ = 0, $\sigma$ = 0.01)

  * With Optimizer = SGD


In [7]:
init_normal = tf.keras.initializers.RandomNormal(mean=0, stddev=0.01)
model_a_sgd = tf.keras.models.Sequential([
                                     
    tf.keras.layers.Flatten(input_shape=(28, 28)),

    tf.keras.layers.Dense(512, activation=tf.nn.sigmoid, kernel_initializer=init_normal, bias_initializer=tf.keras.initializers.Zeros(), name='layer1'),
    tf.keras.layers.Dense(512, activation=tf.nn.sigmoid, kernel_initializer=init_normal, bias_initializer=tf.keras.initializers.Zeros(), name='layer2'),
    tf.keras.layers.Dense(512, activation=tf.nn.sigmoid, kernel_initializer=init_normal, bias_initializer=tf.keras.initializers.Zeros(), name='layer3'),
    tf.keras.layers.Dense(512, activation=tf.nn.sigmoid, kernel_initializer=init_normal, bias_initializer=tf.keras.initializers.Zeros(), name='layer4'),
    tf.keras.layers.Dense(512, activation=tf.nn.sigmoid, kernel_initializer=init_normal, bias_initializer=tf.keras.initializers.Zeros(), name='layer5'),
    tf.keras.layers.Dense(CLASSES, activation='softmax', name='layer_softmax')

])
model_a_sgd.compile(
  optimizer='sgd',
  loss=tf.losses.SparseCategoricalCrossentropy(from_logits=True),
  # loss='mse',
  metrics=['accuracy'])

In [8]:
csv_log = CSVLogger("a_sgd_results.csv")
a_sgd_csv_logger = model_a_sgd.fit(
    x_train, 
    y_train, 
    epochs=200, 
    validation_data=(x_test, y_test), 
    # batch_size=10, 
    verbose=2,
    callbacks=[csv_log]
)
!cp a_sgd_results.csv "drive/My Drive/"

Epoch 1/200


  '"`sparse_categorical_crossentropy` received `from_logits=True`, but '


1875/1875 - 6s - loss: 2.3124 - accuracy: 0.1045 - val_loss: 2.3217 - val_accuracy: 0.0974
Epoch 2/200
1875/1875 - 4s - loss: 2.3114 - accuracy: 0.1025 - val_loss: 2.3142 - val_accuracy: 0.1028
Epoch 3/200
1875/1875 - 5s - loss: 2.3110 - accuracy: 0.1051 - val_loss: 2.3084 - val_accuracy: 0.0974
Epoch 4/200
1875/1875 - 4s - loss: 2.3105 - accuracy: 0.1027 - val_loss: 2.3037 - val_accuracy: 0.1032
Epoch 5/200
1875/1875 - 5s - loss: 2.3105 - accuracy: 0.1039 - val_loss: 2.3070 - val_accuracy: 0.0980
Epoch 6/200
1875/1875 - 4s - loss: 2.3095 - accuracy: 0.1059 - val_loss: 2.3077 - val_accuracy: 0.1028
Epoch 7/200
1875/1875 - 4s - loss: 2.3093 - accuracy: 0.1046 - val_loss: 2.3086 - val_accuracy: 0.0982
Epoch 8/200
1875/1875 - 5s - loss: 2.3095 - accuracy: 0.1030 - val_loss: 2.3096 - val_accuracy: 0.1135
Epoch 9/200
1875/1875 - 5s - loss: 2.3091 - accuracy: 0.1015 - val_loss: 2.3090 - val_accuracy: 0.1135
Epoch 10/200
1875/1875 - 4s - loss: 2.3086 - accuracy: 0.1043 - val_loss: 2.3113 - va

In [None]:
# a_sgd_acc = {}
# for i in range(10, 200, 5):

#   model_a_sgd.fit(x=x_train, y=y_train, epochs=i)
#   a_sgd_acc[i] = model_a_sgd.evaluate(x_test,  y_test, verbose=2)

  * With Optimizer = Adam


In [9]:
init_normal = tf.keras.initializers.RandomNormal(mean=0, stddev=0.01)
model_a_adam = tf.keras.models.Sequential([
                                     
    tf.keras.layers.Flatten(input_shape=(28, 28)),

    tf.keras.layers.Dense(512, activation=tf.nn.sigmoid, kernel_initializer=init_normal, bias_initializer=tf.keras.initializers.Zeros(), name='layer1'),
    tf.keras.layers.Dense(512, activation=tf.nn.sigmoid, kernel_initializer=init_normal, bias_initializer=tf.keras.initializers.Zeros(), name='layer2'),
    tf.keras.layers.Dense(512, activation=tf.nn.sigmoid, kernel_initializer=init_normal, bias_initializer=tf.keras.initializers.Zeros(), name='layer3'),
    tf.keras.layers.Dense(512, activation=tf.nn.sigmoid, kernel_initializer=init_normal, bias_initializer=tf.keras.initializers.Zeros(), name='layer4'),
    tf.keras.layers.Dense(512, activation=tf.nn.sigmoid, kernel_initializer=init_normal, bias_initializer=tf.keras.initializers.Zeros(), name='layer5'),
    tf.keras.layers.Dense(CLASSES, activation='softmax', name='layer_softmax')

])
model_a_adam.compile(
  optimizer='adam',
  loss=tf.losses.SparseCategoricalCrossentropy(from_logits=True),
  # loss='mse',
  metrics=['accuracy'])

In [10]:
csv_log_a_adam = CSVLogger("a_adam_results.csv")
a_adam_csv_logger = model_a_adam.fit(
    x_train, 
    y_train, 
    epochs=200, 
    validation_data=(x_test, y_test), 
    # batch_size=10, 
    verbose=2,
    callbacks=[csv_log_a_adam]
)
!cp a_adam_results.csv "drive/My Drive/"

Epoch 1/200


  '"`sparse_categorical_crossentropy` received `from_logits=True`, but '


1875/1875 - 6s - loss: 1.1514 - accuracy: 0.5796 - val_loss: 0.2700 - val_accuracy: 0.9252
Epoch 2/200
1875/1875 - 5s - loss: 0.2039 - accuracy: 0.9422 - val_loss: 0.1603 - val_accuracy: 0.9538
Epoch 3/200
1875/1875 - 5s - loss: 0.1307 - accuracy: 0.9622 - val_loss: 0.1324 - val_accuracy: 0.9618
Epoch 4/200
1875/1875 - 5s - loss: 0.0984 - accuracy: 0.9714 - val_loss: 0.1196 - val_accuracy: 0.9671
Epoch 5/200
1875/1875 - 5s - loss: 0.0775 - accuracy: 0.9778 - val_loss: 0.1062 - val_accuracy: 0.9720
Epoch 6/200
1875/1875 - 5s - loss: 0.0637 - accuracy: 0.9813 - val_loss: 0.1176 - val_accuracy: 0.9700
Epoch 7/200
1875/1875 - 5s - loss: 0.0508 - accuracy: 0.9848 - val_loss: 0.0884 - val_accuracy: 0.9771
Epoch 8/200
1875/1875 - 5s - loss: 0.0418 - accuracy: 0.9876 - val_loss: 0.0946 - val_accuracy: 0.9756
Epoch 9/200
1875/1875 - 5s - loss: 0.0346 - accuracy: 0.9898 - val_loss: 0.1027 - val_accuracy: 0.9744
Epoch 10/200
1875/1875 - 5s - loss: 0.0294 - accuracy: 0.9917 - val_loss: 0.0912 - va

---
b .Activation function: the logistic sigmoid function; initialization: Xavier initializer
  * With Optimizer = SGD


In [None]:
init_xavier = tf.initializers.GlorotUniform()
model_b_sgd = tf.keras.models.Sequential([
                                     
    tf.keras.layers.Flatten(input_shape=(28, 28)),

    tf.keras.layers.Dense(512, activation=tf.nn.sigmoid, kernel_initializer=init_xavier, name='layer1'),
    tf.keras.layers.Dense(512, activation=tf.nn.sigmoid, kernel_initializer=init_xavier, name='layer2'),
    tf.keras.layers.Dense(512, activation=tf.nn.sigmoid, kernel_initializer=init_xavier, name='layer3'),
    tf.keras.layers.Dense(512, activation=tf.nn.sigmoid, kernel_initializer=init_xavier, name='layer4'),
    tf.keras.layers.Dense(512, activation=tf.nn.sigmoid, kernel_initializer=init_xavier, name='layer5'),
    tf.keras.layers.Dense(CLASSES, activation='softmax', name='layer_softmax')

])
model_b_sgd.compile(
  optimizer='sgd',
  loss=tf.losses.SparseCategoricalCrossentropy(from_logits=True),
  # loss='mse',
  metrics=['accuracy'])

In [None]:
csv_log_model_b_sgd = CSVLogger("b_sgd_results.csv")
b_sgd_csv_logger = model_b_sgd.fit(
    x_train, 
    y_train, 
    epochs=200, 
    validation_data=(x_test, y_test), 
    # batch_size=10, 
    verbose=2,
    callbacks=[csv_log_model_b_sgd]
)
!cp b_sgd_results.csv "drive/My Drive/"

Epoch 1/200


  '"`sparse_categorical_crossentropy` received `from_logits=True`, but '


1875/1875 - 8s - loss: 2.3121 - accuracy: 0.1036 - val_loss: 2.3084 - val_accuracy: 0.1135
Epoch 2/200
1875/1875 - 5s - loss: 2.3119 - accuracy: 0.1022 - val_loss: 2.3088 - val_accuracy: 0.1135
Epoch 3/200
1875/1875 - 5s - loss: 2.3106 - accuracy: 0.1013 - val_loss: 2.3044 - val_accuracy: 0.1028
Epoch 4/200
1875/1875 - 4s - loss: 2.3101 - accuracy: 0.1041 - val_loss: 2.3034 - val_accuracy: 0.1135
Epoch 5/200
1875/1875 - 4s - loss: 2.3096 - accuracy: 0.1072 - val_loss: 2.3058 - val_accuracy: 0.0982
Epoch 6/200
1875/1875 - 5s - loss: 2.3094 - accuracy: 0.1061 - val_loss: 2.3074 - val_accuracy: 0.1010
Epoch 7/200
1875/1875 - 5s - loss: 2.3082 - accuracy: 0.1068 - val_loss: 2.3140 - val_accuracy: 0.1135
Epoch 8/200
1875/1875 - 4s - loss: 2.3086 - accuracy: 0.1054 - val_loss: 2.3051 - val_accuracy: 0.1032
Epoch 9/200
1875/1875 - 5s - loss: 2.3079 - accuracy: 0.1023 - val_loss: 2.3210 - val_accuracy: 0.0958
Epoch 10/200
1875/1875 - 4s - loss: 2.3076 - accuracy: 0.1053 - val_loss: 2.3066 - va

  * With Optimizer = Adam


In [11]:
init_xavier = tf.initializers.GlorotUniform()
model_b_adam = tf.keras.models.Sequential([
                                     
    tf.keras.layers.Flatten(input_shape=(28, 28)),

    tf.keras.layers.Dense(512, activation=tf.nn.sigmoid, kernel_initializer=init_xavier, name='layer1'),
    tf.keras.layers.Dense(512, activation=tf.nn.sigmoid, kernel_initializer=init_xavier, name='layer2'),
    tf.keras.layers.Dense(512, activation=tf.nn.sigmoid, kernel_initializer=init_xavier, name='layer3'),
    tf.keras.layers.Dense(512, activation=tf.nn.sigmoid, kernel_initializer=init_xavier, name='layer4'),
    tf.keras.layers.Dense(512, activation=tf.nn.sigmoid, kernel_initializer=init_xavier, name='layer5'),
    tf.keras.layers.Dense(CLASSES, activation='softmax', name='layer_softmax')

])
model_b_adam.compile(
  optimizer='adam',
  loss=tf.losses.SparseCategoricalCrossentropy(from_logits=True),
  # loss='mse',
  metrics=['accuracy'])

In [12]:
csv_log_b_adam = CSVLogger("b_adam_results.csv")
b_adam_csv_logger = model_b_adam.fit(
    x_train, 
    y_train, 
    epochs=200, 
    validation_data=(x_test, y_test), 
    # batch_size=10, 
    verbose=2,
    callbacks=[csv_log_b_adam]
)
!cp b_adam_results.csv "drive/My Drive/"

Epoch 1/200


  '"`sparse_categorical_crossentropy` received `from_logits=True`, but '


1875/1875 - 6s - loss: 0.5305 - accuracy: 0.8281 - val_loss: 0.1952 - val_accuracy: 0.9453
Epoch 2/200
1875/1875 - 5s - loss: 0.1714 - accuracy: 0.9503 - val_loss: 0.1387 - val_accuracy: 0.9600
Epoch 3/200
1875/1875 - 5s - loss: 0.1179 - accuracy: 0.9654 - val_loss: 0.1291 - val_accuracy: 0.9639
Epoch 4/200
1875/1875 - 5s - loss: 0.0876 - accuracy: 0.9742 - val_loss: 0.0922 - val_accuracy: 0.9731
Epoch 5/200
1875/1875 - 5s - loss: 0.0699 - accuracy: 0.9795 - val_loss: 0.0936 - val_accuracy: 0.9732
Epoch 6/200
1875/1875 - 5s - loss: 0.0546 - accuracy: 0.9834 - val_loss: 0.0794 - val_accuracy: 0.9788
Epoch 7/200
1875/1875 - 5s - loss: 0.0442 - accuracy: 0.9864 - val_loss: 0.0912 - val_accuracy: 0.9756
Epoch 8/200
1875/1875 - 5s - loss: 0.0371 - accuracy: 0.9889 - val_loss: 0.0950 - val_accuracy: 0.9766
Epoch 9/200
1875/1875 - 5s - loss: 0.0320 - accuracy: 0.9903 - val_loss: 0.0930 - val_accuracy: 0.9776
Epoch 10/200
1875/1875 - 5s - loss: 0.0265 - accuracy: 0.9922 - val_loss: 0.0939 - va

---
c. Activation function: ReLU; initialization: random numbers generated from the normal distribution ($\mu$ = 0, $\sigma$ = 0:01)
  * With Optimizer = SGD


In [13]:
init_normal = tf.keras.initializers.RandomNormal(mean=0, stddev=0.01)
model_c_sgd = tf.keras.models.Sequential([
                                     
    tf.keras.layers.Flatten(input_shape=(28, 28)),

    tf.keras.layers.Dense(512, activation='relu', kernel_initializer=init_normal, bias_initializer=tf.keras.initializers.Zeros(), name='layer1'),
    tf.keras.layers.Dense(512, activation='relu', kernel_initializer=init_normal, bias_initializer=tf.keras.initializers.Zeros(), name='layer2'),
    tf.keras.layers.Dense(512, activation='relu', kernel_initializer=init_normal, bias_initializer=tf.keras.initializers.Zeros(), name='layer3'),
    tf.keras.layers.Dense(512, activation='relu', kernel_initializer=init_normal, bias_initializer=tf.keras.initializers.Zeros(), name='layer4'),
    tf.keras.layers.Dense(512, activation='relu', kernel_initializer=init_normal, bias_initializer=tf.keras.initializers.Zeros(), name='layer5'),
    tf.keras.layers.Dense(CLASSES, activation='softmax', name='layer_softmax')


])
model_c_sgd.compile(
  optimizer='sgd',
  loss=tf.losses.SparseCategoricalCrossentropy(from_logits=True),
  # loss='mse',
  metrics=['accuracy'])

In [14]:
csv_log_model_c_sgd = CSVLogger("c_sgd_results.csv")
c_sgd_csv_logger = model_c_sgd.fit(
    x_train, 
    y_train, 
    epochs=200, 
    validation_data=(x_test, y_test), 
    # batch_size=10, 
    verbose=2,
    callbacks=[csv_log_model_c_sgd]
)
!cp c_sgd_results.csv "drive/My Drive/"

Epoch 1/200


  '"`sparse_categorical_crossentropy` received `from_logits=True`, but '


1875/1875 - 5s - loss: 2.3015 - accuracy: 0.1120 - val_loss: 2.3009 - val_accuracy: 0.1135
Epoch 2/200
1875/1875 - 5s - loss: 2.3012 - accuracy: 0.1124 - val_loss: 2.3010 - val_accuracy: 0.1135
Epoch 3/200
1875/1875 - 4s - loss: 2.3011 - accuracy: 0.1124 - val_loss: 2.3007 - val_accuracy: 0.1135
Epoch 4/200
1875/1875 - 4s - loss: 2.3011 - accuracy: 0.1124 - val_loss: 2.3008 - val_accuracy: 0.1135
Epoch 5/200
1875/1875 - 4s - loss: 2.3009 - accuracy: 0.1124 - val_loss: 2.3006 - val_accuracy: 0.1135
Epoch 6/200
1875/1875 - 4s - loss: 2.3006 - accuracy: 0.1124 - val_loss: 2.3001 - val_accuracy: 0.1135
Epoch 7/200
1875/1875 - 5s - loss: 2.2998 - accuracy: 0.1124 - val_loss: 2.2984 - val_accuracy: 0.1135
Epoch 8/200
1875/1875 - 4s - loss: 2.2882 - accuracy: 0.1374 - val_loss: 2.2380 - val_accuracy: 0.2105
Epoch 9/200
1875/1875 - 4s - loss: 1.8271 - accuracy: 0.2797 - val_loss: 1.5234 - val_accuracy: 0.4012
Epoch 10/200
1875/1875 - 4s - loss: 1.2512 - accuracy: 0.5166 - val_loss: 1.0051 - va

  * With optimizer = Adam

In [15]:
init_normal = tf.keras.initializers.RandomNormal(mean=0, stddev=0.01)
model_c_adam = tf.keras.models.Sequential([
                                     
    tf.keras.layers.Flatten(input_shape=(28, 28)),

    tf.keras.layers.Dense(512, activation='relu', kernel_initializer=init_normal, bias_initializer=tf.keras.initializers.Zeros(), name='layer1'),
    tf.keras.layers.Dense(512, activation='relu', kernel_initializer=init_normal, bias_initializer=tf.keras.initializers.Zeros(), name='layer2'),
    tf.keras.layers.Dense(512, activation='relu', kernel_initializer=init_normal, bias_initializer=tf.keras.initializers.Zeros(), name='layer3'),
    tf.keras.layers.Dense(512, activation='relu', kernel_initializer=init_normal, bias_initializer=tf.keras.initializers.Zeros(), name='layer4'),
    tf.keras.layers.Dense(512, activation='relu', kernel_initializer=init_normal, bias_initializer=tf.keras.initializers.Zeros(), name='layer5'),
    tf.keras.layers.Dense(CLASSES, activation='softmax', name='layer_softmax')

])
model_c_adam.compile(
  optimizer='adam',
  loss=tf.losses.SparseCategoricalCrossentropy(from_logits=True),
  # loss='mse',
  metrics=['accuracy'])

In [16]:
csv_log_model_c_adam = CSVLogger("c_adam_results.csv")
c_adam_csv_logger = model_c_adam.fit(
    x_train, 
    y_train, 
    epochs=200, 
    validation_data=(x_test, y_test), 
    # batch_size=10, 
    verbose=2,
    callbacks=[csv_log_model_c_adam]
)
!cp c_adam_results.csv "drive/My Drive/"

Epoch 1/200


  '"`sparse_categorical_crossentropy` received `from_logits=True`, but '


1875/1875 - 6s - loss: 0.3353 - accuracy: 0.8964 - val_loss: 0.1804 - val_accuracy: 0.9511
Epoch 2/200
1875/1875 - 5s - loss: 0.1289 - accuracy: 0.9656 - val_loss: 0.1319 - val_accuracy: 0.9650
Epoch 3/200
1875/1875 - 5s - loss: 0.0928 - accuracy: 0.9752 - val_loss: 0.1153 - val_accuracy: 0.9726
Epoch 4/200
1875/1875 - 5s - loss: 0.0743 - accuracy: 0.9797 - val_loss: 0.1288 - val_accuracy: 0.9674
Epoch 5/200
1875/1875 - 5s - loss: 0.0603 - accuracy: 0.9835 - val_loss: 0.0919 - val_accuracy: 0.9771
Epoch 6/200
1875/1875 - 5s - loss: 0.0496 - accuracy: 0.9863 - val_loss: 0.0985 - val_accuracy: 0.9772
Epoch 7/200
1875/1875 - 5s - loss: 0.0473 - accuracy: 0.9874 - val_loss: 0.1025 - val_accuracy: 0.9769
Epoch 8/200
1875/1875 - 5s - loss: 0.0381 - accuracy: 0.9894 - val_loss: 0.1026 - val_accuracy: 0.9806
Epoch 9/200
1875/1875 - 5s - loss: 0.0374 - accuracy: 0.9903 - val_loss: 0.1166 - val_accuracy: 0.9782
Epoch 10/200
1875/1875 - 5s - loss: 0.0330 - accuracy: 0.9915 - val_loss: 0.1382 - va

d. Activation function: ReLU; initialization: Xavier initializer
  * With Optimizer = SGD


In [17]:
init_xavier = tf.initializers.GlorotUniform()
model_d_sgd = tf.keras.models.Sequential([
                                     
    tf.keras.layers.Flatten(input_shape=(28, 28)),

    tf.keras.layers.Dense(512, activation='relu', kernel_initializer=init_xavier, name='layer1'),
    tf.keras.layers.Dense(512, activation='relu', kernel_initializer=init_xavier, name='layer2'),
    tf.keras.layers.Dense(512, activation='relu', kernel_initializer=init_xavier, name='layer3'),
    tf.keras.layers.Dense(512, activation='relu', kernel_initializer=init_xavier, name='layer4'),
    tf.keras.layers.Dense(512, activation='relu', kernel_initializer=init_xavier, name='layer5'),
    tf.keras.layers.Dense(CLASSES, activation='softmax', name='layer_softmax')

])
model_d_sgd.compile(
  optimizer='sgd',
  loss=tf.losses.SparseCategoricalCrossentropy(from_logits=True),
  # loss='mse',
  metrics=['accuracy'])

In [18]:
csv_log_d_sgd = CSVLogger("d_sgd_results.csv")
d_sgd_csv_logger = model_d_sgd.fit(
    x_train, 
    y_train, 
    epochs=200, 
    validation_data=(x_test, y_test), 
    # batch_size=10, 
    verbose=2,
    callbacks=[csv_log_d_sgd]
)
!cp d_sgd_results.csv "drive/My Drive/"

Epoch 1/200


  '"`sparse_categorical_crossentropy` received `from_logits=True`, but '


1875/1875 - 5s - loss: 2.3137 - accuracy: 0.1013 - val_loss: 2.3133 - val_accuracy: 0.1135
Epoch 2/200
1875/1875 - 5s - loss: 2.3121 - accuracy: 0.1052 - val_loss: 2.3234 - val_accuracy: 0.1135
Epoch 3/200
1875/1875 - 5s - loss: 2.3117 - accuracy: 0.1036 - val_loss: 2.3107 - val_accuracy: 0.1028
Epoch 4/200
1875/1875 - 5s - loss: 2.3108 - accuracy: 0.1035 - val_loss: 2.3093 - val_accuracy: 0.1135
Epoch 5/200
1875/1875 - 4s - loss: 2.3104 - accuracy: 0.1046 - val_loss: 2.3155 - val_accuracy: 0.1009
Epoch 6/200
1875/1875 - 4s - loss: 2.3100 - accuracy: 0.1039 - val_loss: 2.3086 - val_accuracy: 0.1135
Epoch 7/200
1875/1875 - 4s - loss: 2.3094 - accuracy: 0.1066 - val_loss: 2.3107 - val_accuracy: 0.0958
Epoch 8/200
1875/1875 - 4s - loss: 2.3088 - accuracy: 0.1045 - val_loss: 2.3081 - val_accuracy: 0.1009
Epoch 9/200
1875/1875 - 4s - loss: 2.3082 - accuracy: 0.1064 - val_loss: 2.3058 - val_accuracy: 0.0974
Epoch 10/200
1875/1875 - 4s - loss: 2.3080 - accuracy: 0.1062 - val_loss: 2.3116 - va

  * With Optimizer = Adam


In [19]:
init_xavier = tf.initializers.GlorotUniform()
model_d_adam = tf.keras.models.Sequential([
                                     
    tf.keras.layers.Flatten(input_shape=(28, 28)),

    tf.keras.layers.Dense(512, activation='relu', kernel_initializer=init_xavier, name='layer1'),
    tf.keras.layers.Dense(512, activation='relu', kernel_initializer=init_xavier, name='layer2'),
    tf.keras.layers.Dense(512, activation='relu', kernel_initializer=init_xavier, name='layer3'),
    tf.keras.layers.Dense(512, activation='relu', kernel_initializer=init_xavier, name='layer4'),
    tf.keras.layers.Dense(512, activation='relu', kernel_initializer=init_xavier, name='layer5'),
    tf.keras.layers.Dense(CLASSES, activation='softmax', name='layer_softmax')

])
model_d_adam.compile(
  optimizer='adam',
  loss=tf.losses.SparseCategoricalCrossentropy(from_logits=True),
  # loss='mse',
  metrics=['accuracy'])

In [20]:
csv_log_model_d_adam = CSVLogger("d_adam_results.csv")
d_sgd_csv_logger = model_d_adam.fit(
    x_train, 
    y_train, 
    epochs=200, 
    validation_data=(x_test, y_test), 
    # batch_size=10, 
    verbose=2,
    callbacks=[csv_log_model_d_adam]
)
!cp d_adam_results.csv "drive/My Drive/"

Epoch 1/200


  '"`sparse_categorical_crossentropy` received `from_logits=True`, but '


1875/1875 - 6s - loss: 0.5054 - accuracy: 0.8328 - val_loss: 0.2179 - val_accuracy: 0.9339
Epoch 2/200
1875/1875 - 5s - loss: 0.1711 - accuracy: 0.9505 - val_loss: 0.1355 - val_accuracy: 0.9613
Epoch 3/200
1875/1875 - 5s - loss: 0.1182 - accuracy: 0.9658 - val_loss: 0.1135 - val_accuracy: 0.9668
Epoch 4/200
1875/1875 - 5s - loss: 0.0897 - accuracy: 0.9736 - val_loss: 0.1167 - val_accuracy: 0.9679
Epoch 5/200
1875/1875 - 5s - loss: 0.0700 - accuracy: 0.9796 - val_loss: 0.1245 - val_accuracy: 0.9643
Epoch 6/200
1875/1875 - 5s - loss: 0.0558 - accuracy: 0.9835 - val_loss: 0.0915 - val_accuracy: 0.9749
Epoch 7/200
1875/1875 - 5s - loss: 0.0455 - accuracy: 0.9861 - val_loss: 0.0907 - val_accuracy: 0.9788
Epoch 8/200
1875/1875 - 5s - loss: 0.0383 - accuracy: 0.9888 - val_loss: 0.0869 - val_accuracy: 0.9782
Epoch 9/200
1875/1875 - 5s - loss: 0.0313 - accuracy: 0.9908 - val_loss: 0.1082 - val_accuracy: 0.9732
Epoch 10/200
1875/1875 - 5s - loss: 0.0291 - accuracy: 0.9913 - val_loss: 0.0981 - va

e. Activation function: ReLU; initialization: Kaiming He's initializer
  * With Optimizer = SGD

In [None]:
init_he = tf.initializers.HeUniform()
model_e_sgd = tf.keras.models.Sequential([
                                     
    tf.keras.layers.Flatten(input_shape=(28, 28)),

    tf.keras.layers.Dense(512, activation='relu', kernel_initializer=init_he, name='layer1'),
    tf.keras.layers.Dense(512, activation='relu', kernel_initializer=init_he, name='layer2'),
    tf.keras.layers.Dense(512, activation='relu', kernel_initializer=init_he, name='layer3'),
    tf.keras.layers.Dense(512, activation='relu', kernel_initializer=init_he, name='layer4'),
    tf.keras.layers.Dense(512, activation='relu', kernel_initializer=init_he, name='layer5'),
    tf.keras.layers.Dense(CLASSES, activation='softmax', name='layer_softmax')


])
model_e_sgd.compile(
  optimizer='sgd',
  loss=tf.losses.SparseCategoricalCrossentropy(from_logits=True),
  # loss='mse',
  metrics=['accuracy'])

In [None]:
csv_log_model_e_sgd = CSVLogger("e_sgd_results.csv")
e_sgd_csv_logger = model_e_sgd.fit(
    x_train, 
    y_train, 
    epochs=200, 
    validation_data=(x_test, y_test), 
    # batch_size=10, 
    verbose=2,
    callbacks=[csv_log_model_e_sgd]
)
!cp e_sgd_results.csv "drive/My Drive/"

Epoch 1/200


  '"`sparse_categorical_crossentropy` received `from_logits=True`, but '


1875/1875 - 6s - loss: 0.3733 - accuracy: 0.8917 - val_loss: 0.1808 - val_accuracy: 0.9462
Epoch 2/200
1875/1875 - 5s - loss: 0.1574 - accuracy: 0.9531 - val_loss: 0.1284 - val_accuracy: 0.9611
Epoch 3/200
1875/1875 - 4s - loss: 0.1108 - accuracy: 0.9673 - val_loss: 0.1184 - val_accuracy: 0.9646
Epoch 4/200
1875/1875 - 4s - loss: 0.0852 - accuracy: 0.9744 - val_loss: 0.0942 - val_accuracy: 0.9722
Epoch 5/200
1875/1875 - 4s - loss: 0.0681 - accuracy: 0.9805 - val_loss: 0.0923 - val_accuracy: 0.9705
Epoch 6/200
1875/1875 - 4s - loss: 0.0542 - accuracy: 0.9840 - val_loss: 0.0841 - val_accuracy: 0.9739
Epoch 7/200
1875/1875 - 4s - loss: 0.0438 - accuracy: 0.9874 - val_loss: 0.0785 - val_accuracy: 0.9755
Epoch 8/200
1875/1875 - 4s - loss: 0.0357 - accuracy: 0.9900 - val_loss: 0.0779 - val_accuracy: 0.9757
Epoch 9/200
1875/1875 - 4s - loss: 0.0283 - accuracy: 0.9923 - val_loss: 0.0736 - val_accuracy: 0.9780
Epoch 10/200
1875/1875 - 4s - loss: 0.0224 - accuracy: 0.9943 - val_loss: 0.0751 - va

  * With Optimizer = *Adam*

In [None]:
init_he = tf.initializers.HeUniform()
model_e_adam = tf.keras.models.Sequential([
            
    tf.keras.layers.Flatten(input_shape=(28, 28)),
    tf.keras.layers.Dense(512, activation='relu', kernel_initializer=init_he, name='layer1'),
    tf.keras.layers.Dense(512, activation='relu', kernel_initializer=init_he, name='layer2'),
    tf.keras.layers.Dense(512, activation='relu', kernel_initializer=init_he, name='layer3'),
    tf.keras.layers.Dense(512, activation='relu', kernel_initializer=init_he, name='layer4'),
    tf.keras.layers.Dense(512, activation='relu', kernel_initializer=init_he, name='layer5'),
    tf.keras.layers.Dense(CLASSES, activation='softmax', name='layer_softmax')

])
model_e_adam.compile(
  optimizer='adam',
  loss=tf.losses.SparseCategoricalCrossentropy(from_logits=True),
  # loss='mse',
  metrics=['accuracy'])

In [None]:
csv_log_model_e_adam = CSVLogger("e_adam_results.csv")
e_adam_csv_logger = model_e_adam.fit(
    x_train, 
    y_train, 
    epochs=200, 
    validation_data=(x_test, y_test), 
    # batch_size=10, 
    verbose=2,
    callbacks=[csv_log_model_e_adam]
)
!cp e_adam_results.csv "drive/My Drive/"

Epoch 1/200


  '"`sparse_categorical_crossentropy` received `from_logits=True`, but '


1875/1875 - 6s - loss: 0.2262 - accuracy: 0.9336 - val_loss: 0.1476 - val_accuracy: 0.9584
Epoch 2/200
1875/1875 - 5s - loss: 0.1171 - accuracy: 0.9672 - val_loss: 0.1000 - val_accuracy: 0.9685
Epoch 3/200
1875/1875 - 6s - loss: 0.0867 - accuracy: 0.9758 - val_loss: 0.1120 - val_accuracy: 0.9725
Epoch 4/200
1875/1875 - 5s - loss: 0.0723 - accuracy: 0.9809 - val_loss: 0.1308 - val_accuracy: 0.9682
Epoch 5/200
1875/1875 - 5s - loss: 0.0615 - accuracy: 0.9830 - val_loss: 0.1100 - val_accuracy: 0.9740
Epoch 6/200
1875/1875 - 5s - loss: 0.0495 - accuracy: 0.9867 - val_loss: 0.1496 - val_accuracy: 0.9719
Epoch 7/200
1875/1875 - 5s - loss: 0.0468 - accuracy: 0.9870 - val_loss: 0.1029 - val_accuracy: 0.9793
Epoch 8/200
1875/1875 - 5s - loss: 0.0372 - accuracy: 0.9898 - val_loss: 0.1173 - val_accuracy: 0.9777
Epoch 9/200
1875/1875 - 5s - loss: 0.0414 - accuracy: 0.9890 - val_loss: 0.1042 - val_accuracy: 0.9810
Epoch 10/200
1875/1875 - 5s - loss: 0.0334 - accuracy: 0.9918 - val_loss: 0.1155 - va

In [21]:
# class TestCallback(tf.keras.callbacks.Callback):
#     def __init__(self, test_data):
#         self.test_data = test_data

#     def on_epoch_end(self, epoch, logs={}):
#         x, y = self.test_data
#         loss, acc = self.model.evaluate(x, y, verbose=0)
#         print('\nTesting loss: {}, acc: {}\n'.format(loss, acc))


---
#### Citations:
* *CSVLogger callbacks:* https://towardsdatascience.com/a-practical-introduction-to-keras-callbacks-in-tensorflow-2-705d0c584966
* *Callbacks:* https://github.com/keras-team/keras/issues/2548 

---