### DNN example - MNIST dataset

#### Load the data

In [1]:
from tensorflow.keras.datasets import mnist

(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

print(train_images.shape, test_images.shape)
print(len(train_labels), len(test_labels))

(60000, 28, 28) (10000, 28, 28)
60000 10000


#### Pre-processing
Reshaping, standardizing inputs and discretizing output (classification problem)

In [2]:
# Each image will be linearized and be represented in a line in a matrix.
# For this it is needed to adjust the dimensions for each entry to a 1D vector.
# There are going to be 28*28 (784, for each digit pixel) input nodes and 10 output nodes (for each digit class).
# Data is divided by its standard deviation for standarization

In [3]:
train_images = train_images.reshape((60000, 28 * 28))
train_images = train_images.astype('float32') / 255
test_images = test_images.reshape((10000, 28 * 28))
test_images = test_images.astype('float32') / 255

In [4]:
# Categorical transformation performs one-hot encoding.
# The label matrix is going to have 10 columns where each entry is binary.

In [5]:
from tensorflow.keras.utils import to_categorical

train_labels = to_categorical(train_labels)
test_labels = to_categorical(test_labels)

#### Defining model structure (feedforward DNN)

In [None]:
# Groups a linear stack of layer into a Model and forms the models computational graph.
# This computational graph is sequential, so it means each layer completes each other.
# The number of layers and the number of nodes of each layer is the most important hyperparameter of Neural Networks.
# If we add one layer we call only test the number of nodes of the only intermediate layer.

In [None]:
# The model has 1 intermediate layer with 512 nodes that are activated by the RelU function.
# The output layer has 10 output nodes that represent each class and for the Multi-class classification problem, the activating function is the SoftMax.

In [6]:
from tensorflow.keras import models
from tensorflow.keras import layers

network = models.Sequential()
network.add(layers.Dense(512, activation='relu', input_shape=(28 * 28,)))
network.add(layers.Dense(10, activation='softmax'))

network.summary()
# Summary gives an idea of the model's complexity

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 512)               401920    
                                                                 
 dense_1 (Dense)             (None, 10)                5130      
                                                                 
Total params: 407,050
Trainable params: 407,050
Non-trainable params: 0
_________________________________________________________________


#### Training the DNN

In [None]:
# The compiler allows us to define the models' optimizer function, loss function and error metrics.
# As the training is done by batches we can see how the model is evolving as we are receiving each metric for each batch.
# Epochs -> number of times the dataset is worked through completely.

In [7]:
network.compile(optimizer='rmsprop',
                loss='categorical_crossentropy',
                metrics=['accuracy'])

network.fit(train_images, train_labels, epochs=5, batch_size=128)
# After the first epoch the accuracy for image identification is 92%
# If the learning rate is too high the training can be unstable and not converge.
# Batch size -> defines the number of samples that will be propagated through the network.

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x1c93427bd30>

## Exercise: Test the capacity
Build on layers and change number of nodes

In [8]:
network2 = models.Sequential()
network2.add(layers.Dense(300, activation='relu', input_shape=(28 * 28,)))
network2.add(layers.Dense(200, activation='relu'))
network2.add(layers.Dense(10, activation='softmax'))

network2.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_2 (Dense)             (None, 300)               235500    
                                                                 
 dense_3 (Dense)             (None, 200)               60200     
                                                                 
 dense_4 (Dense)             (None, 10)                2010      
                                                                 
Total params: 297,710
Trainable params: 297,710
Non-trainable params: 0
_________________________________________________________________


In [9]:
network2.compile(optimizer='rmsprop',
                loss='categorical_crossentropy',
                metrics=['accuracy'])

network2.fit(train_images, train_labels, epochs=5, batch_size=128)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x1c9348d9210>

In [12]:
# Final accuracy of the second model is slightly higher than the first model.

In [10]:
network3 = models.Sequential()
network3.add(layers.Dense(500, activation='relu', input_shape=(28 * 28,)))
network3.add(layers.Dense(300, activation='relu'))
network3.add(layers.Dense(200, activation='relu'))
network3.add(layers.Dense(10, activation='softmax'))

network3.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_5 (Dense)             (None, 500)               392500    
                                                                 
 dense_6 (Dense)             (None, 300)               150300    
                                                                 
 dense_7 (Dense)             (None, 200)               60200     
                                                                 
 dense_8 (Dense)             (None, 10)                2010      
                                                                 
Total params: 605,010
Trainable params: 605,010
Non-trainable params: 0
_________________________________________________________________


In [11]:
network3.compile(optimizer='rmsprop',
                loss='categorical_crossentropy',
                metrics=['accuracy'])

network3.fit(train_images, train_labels, epochs=5, batch_size=128)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x1c934b9bd90>

In [None]:
# The third model has the highest value for accuracy.

#### Using the DNN to predict outputs for test set and calculating error

In [None]:
# Predicts the probability for each class

In [14]:
test_preds = network.predict(test_images)
test_preds



array([[3.52697953e-07, 1.05442766e-08, 3.23828608e-05, ...,
        9.99651790e-01, 1.23705249e-07, 1.98044700e-06],
       [6.33557278e-08, 3.67835048e-04, 9.99502778e-01, ...,
        3.72200708e-11, 1.50239775e-05, 8.78704390e-13],
       [2.14414101e-07, 9.98934925e-01, 2.97317420e-05, ...,
        8.19017529e-04, 1.46146704e-04, 3.61686034e-07],
       ...,
       [1.51808077e-13, 3.05466263e-10, 8.07607616e-13, ...,
        4.89016941e-07, 1.38808730e-07, 5.38404402e-06],
       [1.88790708e-07, 9.84396742e-09, 2.94454128e-10, ...,
        1.02248681e-08, 1.19096476e-04, 5.09961240e-10],
       [8.28736461e-07, 2.17906422e-11, 2.79393575e-08, ...,
        1.78340537e-11, 1.81671667e-09, 1.06463831e-10]], dtype=float32)

In [15]:
# Row 3 from Column 0, indicates that the models has a high accuracy (9,99)

In [16]:
import numpy as np

test_classes = np.argmax(network.predict(test_images), axis=-1)



In [18]:
# Class predictions for each example
test_classes

array([7, 2, 1, ..., 4, 5, 6], dtype=int64)

In [19]:
# Calculate the loss and accuracy of the test data
test_loss, test_acc = network.evaluate(test_images, test_labels)
print(test_loss, test_acc)

0.0633784756064415 0.9810000061988831


In [20]:
# We can attest that the model is not overfitting as the accuracy stays similar in both the training and testing dataset.