### Small Tensorflow project using MNIST dataset.

In [48]:
import tensorflow as tf

from tensorflow.keras.losses import SparseCategoricalCrossentropy, MeanAbsoluteError, MeanSquaredError
from tensorflow.keras.metrics import Mean, SparseCategoricalAccuracy
from tensorflow.keras.layers import Dense, Flatten, Conv2D
from tensorflow.keras.optimizers import Adam, SGD, Adagrad
from tensorflow.keras import Model

import pandas as pd

import time


##### Load MNIST dataset

In [38]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

# Rescale to range 0..1
x_train = x_train / 255.0
x_test = x_test / 255.0

# Add channels
x_train = x_train[..., tf.newaxis].astype("float32")
x_test = x_test[..., tf.newaxis].astype("float32")

##### Shuffle dataset and prepare batches

In [39]:
training_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
training_dataset = training_dataset.shuffle(buffer_size=training_dataset.cardinality()).batch(32)

testing_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(32)

##### Create model class

In [40]:
class model_1conv(Model):
    # Constructor
    def __init__(self, n_of_filters, n_of_dense_neurons):
        super(model_1conv, self).__init__()

        self.conv = Conv2D(filters=n_of_filters,
                           kernel_size=3,
                           activation="relu")
        self.flatten = Flatten()
        self.dense_1 = Dense(n_of_dense_neurons, activation="relu")
        self.dense_2 = Dense(10)

    # Call method
    def call(self, x):
        x = self.conv(x)
        x = self.flatten(x)
        x = self.dense_1(x)
        x = self.dense_2(x)

        return x

In [41]:
class model_2conv(Model):
    # Constructor
    def __init__(self, n_of_filters, n_of_dense_neurons):
        super(model_2conv, self).__init__()

        self.conv_1 = Conv2D(filters=n_of_filters[0],
                             kernel_size=3,
                             activation="relu")
        self.conv_1 = Conv2D(filters=n_of_filters[1],
                             kernel_size=3,
                             activation="relu")
        self.flatten = Flatten()
        self.dense_1 = Dense(n_of_dense_neurons, activation="relu")
        self.dense_2 = Dense(10)

    # Call method
    def call(self, x):
        x = self.conv_1(x)
        x = self.conv_2(x)
        x = self.flatten(x)
        x = self.dense_1(x)
        x = self.dense_2(x)

        return x

##### Create instances of models

In [42]:
# Different model parameters
filters = [4, 8, 16, 32, 64]
dense_neurons = [16, 32, 64, 128, 256]

# Models with one Convolutional Layer
conv1_var_f = [model_1conv(n_of_filters=f, n_of_dense_neurons=128) for f in filters]            # Variated filters
conv1_var_d = [model_1conv(n_of_filters=32, n_of_dense_neurons=dn) for dn in dense_neurons]     # Variated number of neurons in Dense layer

# Model with two Convolutional Layers
conv2_var_f = [model_2conv(n_of_filters=(f, f), n_of_dense_neurons=128) for f in filters]               # Variated filters
conv2_var_f = [model_2conv(n_of_filters=(32, 32), n_of_dense_neurons=dn) for dn in dense_neurons]       # Variated number of neurons in Dense layer

##### Loss functions and optimizers for training

In [43]:
loss_objects = [MeanAbsoluteError(), MeanSquaredError(), SparseCategoricalCrossentropy(from_logits=True)]
optimizers = [Adam(), SGD(), Adagrad()]

In [44]:
# Metrics for measurement of model's loss and accuracy
training_loss = Mean(name="train_loss")
testing_loss = Mean(name="test_loss")

training_accuracy = SparseCategoricalAccuracy(name="train_acc")
testing_accuracy = SparseCategoricalAccuracy(name="test_acc")

##### Model's training and testing functions

In [45]:
# Define function as Tensorflow Graph
# @tf.function
def train_step(model, loss_object, optimizer, images, labels):

    # Automatic differentiation
    with tf.GradientTape() as tape:
        predictions = model(images, training=True)
        loss = loss_object(labels, predictions)

    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))

    training_loss(loss)
    training_accuracy(labels, predictions)

In [46]:
# Define function as Tensorflow Graph
# @tf.function
def test_step(model, loss_object, images, labels):
    predictions = model(images, training=False)
    loss = loss_object(labels, predictions)

    testing_loss(loss)
    testing_accuracy(labels, predictions)

##### Training and model metrics acquisition

The deafult model has one Convolutional Layer with 32 filters and a Dense Layer with 128 neurons, uses SparseCategoricalCrossentropy to calculate loss and uses Adam optimizer.

I decided to compare models after changing one of the following aspects:

- Number of filters used in Convolutional Layers
- Number of neurons used in Dense Layers
- What impact will adding one additional Convolutional Layer have (and if previously mentions aspects matter after adding it)
- Type of Loss Function used
- Type of optimizer used 

In [51]:
N_OF_EPOCHS = 5

metrics = pd.DataFrame(columns=["train_loss", "train_acc", "train_time" , "test_loss", "test_acc"])

# Training and testing of models with one Convolutional Layer and variated number of filters
for i, model in enumerate(conv1_var_f):       # [4, 8, 16, 32, 64]
    print(f"Model with {filters[i]} filters:")

    for epoch in range(N_OF_EPOCHS):

        # Metrics reset
        training_loss.reset_states()
        testing_loss.reset_states()
        training_accuracy.reset_states()
        testing_accuracy.reset_states()

        # Training
        t_start = time.time()
        for train_images, train_labels in training_dataset:
            train_step(model=model,
                       loss_object=SparseCategoricalCrossentropy(from_logits=True),
                       optimizer=Adam(),
                       images=train_images,
                       labels=train_labels)
        t_stop = time.time()

        # Testing
        for test_images, test_labels in testing_dataset:
            test_step(model=model,
                      loss_object=SparseCategoricalCrossentropy(from_logits=True),
                      images=test_images,
                      labels=test_labels)

        record = {"train_loss": training_loss.result(),
                  "train_acc": training_accuracy.result(),
                  "train_time": t_stop - t_start,
                  "test_loss": testing_loss.result(),
                  "test_acc": testing_accuracy.result()}
        metrics = pd.concat([metrics, pd.DataFrame([record])], ignore_index=True)

        print(f"    Epoch {epoch + 1}, "
              f"    Loss: {training_loss.result()}, "
              f"    Accuracy: {training_accuracy.result() * 100}, "
              f"    Test Loss: {testing_loss.result()}, "
              f"    Test Accuracy: {testing_accuracy.result() * 100}")

    print("\n")


Model with 4 filters:


  metrics = pd.concat([metrics, pd.DataFrame([record])], ignore_index=True)


    Epoch 1,     Loss: 0.21204976737499237,     Accuracy: 95.03499603271484,     Test Loss: 0.17134815454483032,     Test Accuracy: 95.69000244140625
    Epoch 2,     Loss: 0.23069767653942108,     Accuracy: 94.9000015258789,     Test Loss: 0.1959061324596405,     Test Accuracy: 95.36000061035156
    Epoch 3,     Loss: 0.2200690060853958,     Accuracy: 95.37666320800781,     Test Loss: 0.2592508792877197,     Test Accuracy: 95.44000244140625
    Epoch 4,     Loss: 0.20304836332798004,     Accuracy: 95.83833312988281,     Test Loss: 0.18744854629039764,     Test Accuracy: 96.18000030517578
    Epoch 5,     Loss: 0.19112274050712585,     Accuracy: 96.1683349609375,     Test Loss: 0.2564159035682678,     Test Accuracy: 95.22000122070312
Model with 8 filters:
    Epoch 1,     Loss: 0.2497825026512146,     Accuracy: 92.79500579833984,     Test Loss: 0.1670392006635666,     Test Accuracy: 95.59000396728516
    Epoch 2,     Loss: 0.1685982197523117,     Accuracy: 95.68499755859375,     Test L