# Exercise 14.9
## Hands-on Machine Learning with Scikit-Learn, Keras and Tensorflow, 2nd Ed

__Andrés Felipe García Albarracín <br>
Feb 8, 2021__ <br>

Build your own CNN from scratch and try to achieve the highest possible accuracy on MNIST.

## 1. Libraries

In [1]:
import numpy as np
import time
import pandas as pd

In [2]:
import tensorflow as tf
tf.config.experimental.list_physical_devices(device_type='GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

## 2. Load input data

In [3]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

In [4]:
mitad = int(x_test.shape[0]/2)
x_val = x_test[0:mitad,:,:]
x_test = x_test[mitad:,:,:]
y_val = y_test[0:mitad]
y_test = y_test[mitad:]

In [5]:
x_test.shape[1:]

(28, 28)

In [6]:
colResults = ["Model_name", "Description", "Loss", "Accuracy"]
dfResults = pd.DataFrame(columns=colResults)

## 3. Model proposals

### a) A model very similar to LeNet-5

In [7]:
inputs = tf.keras.Input(shape = (28,28,1))
x = tf.keras.layers.Conv2D(filters = 8, kernel_size = 5, strides = 2, padding = "same", activation = "relu")(inputs)
x = tf.keras.layers.AveragePooling2D(pool_size = 2, strides = 2, padding = "valid")(x)
x = tf.keras.layers.Conv2D(filters = 16, kernel_size = 3, strides = 1, padding = "same", activation = "relu")(x)
x = tf.keras.layers.AveragePooling2D(pool_size = 3, strides = 2, padding = "valid")(x)
x = tf.keras.layers.Conv2D(filters = 64, kernel_size = 2, strides = 1, padding = "same", activation = "relu")(x)
x = tf.keras.layers.Conv2D(filters = 128, kernel_size = 3, strides = 1, padding = "valid", activation = "relu")(x)
x = tf.keras.layers.Conv2D(filters = 32, kernel_size = 1, strides = 1, padding = "valid", activation = "relu")(x)
x = tf.keras.layers.Flatten()(x)
outputs = tf.keras.layers.Dense(units = 10, activation = "softmax")(x)

In [8]:
model = tf.keras.Model(inputs = inputs, outputs = outputs)

In [9]:
print(model.summary())

Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 28, 28, 1)]       0         
_________________________________________________________________
conv2d (Conv2D)              (None, 14, 14, 8)         208       
_________________________________________________________________
average_pooling2d (AveragePo (None, 7, 7, 8)           0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 7, 7, 16)          1168      
_________________________________________________________________
average_pooling2d_1 (Average (None, 3, 3, 16)          0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 3, 3, 64)          4160      
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 1, 1, 128)        

In [10]:
model.compile(
    loss = "sparse_categorical_crossentropy",
    metrics = ['accuracy'])

__Model training__

In [11]:
# =========================
# Training Parameters
# =========================
epochs = 100
patience = 10
# =========================
# Callbacks
# =========================
tb_cb = tf.keras.callbacks.TensorBoard("./Results_tensorboard/Ex_14_9/" + time.strftime("model_A_%Y_%m_%d_%H_%M_%S"))
es_cb = tf.keras.callbacks.EarlyStopping(patience= patience, restore_best_weights=True)
# =========================
# Training
# =========================
model.fit(
    x= x_train,
    y= y_train,
    epochs= epochs,
    callbacks= [tb_cb, es_cb],
    validation_data= (x_val, y_val))

Epoch 1/100
Instructions for updating:
use `tf.profiler.experimental.stop` instead.
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100


<tensorflow.python.keras.callbacks.History at 0x7fb52005bc10>

In [12]:
resultAct = model.evaluate(x = x_test, y = y_test)
print(resultAct)

[0.04961315169930458, 0.9882000088691711]


In [13]:
resultAct.insert(0,'A')
resultAct.insert(1,'Similar to LeNet')
dfResults = dfResults.append(
    pd.DataFrame(columns = colResults, data=[resultAct]),
    ignore_index=True)
dfResults

Unnamed: 0,Model_name,Description,Loss,Accuracy
0,A,Similar to LeNet,0.049613,0.9882


### b) Similar to LeNet and incluiding dropout 

In [14]:
inputs = tf.keras.Input(shape = (28,28,1))
x = tf.keras.layers.Conv2D(filters = 8, kernel_size = 5, strides = 2, padding = "same", activation = "relu")(inputs)
x = tf.keras.layers.AveragePooling2D(pool_size = 2, strides = 2, padding = "valid")(x)
x = tf.keras.layers.Conv2D(filters = 16, kernel_size = 3, strides = 1, padding = "same", activation = "relu")(x)
x = tf.keras.layers.AveragePooling2D(pool_size = 3, strides = 2, padding = "valid")(x)
x = tf.keras.layers.Conv2D(filters = 64, kernel_size = 2, strides = 1, padding = "same", activation = "relu")(x)
x = tf.keras.layers.Conv2D(filters = 128, kernel_size = 3, strides = 1, padding = "valid", activation = "relu")(x)
x = tf.keras.layers.Dropout(0.45)(x)
x = tf.keras.layers.Conv2D(filters = 32, kernel_size = 1, strides = 1, padding = "valid", activation = "relu")(x)
x = tf.keras.layers.Dropout(0.45)(x)
x = tf.keras.layers.Flatten()(x)
outputs = tf.keras.layers.Dense(units = 10, activation = "softmax")(x)

In [15]:
model = tf.keras.Model(inputs = inputs, outputs = outputs)
print(model.summary())

Model: "functional_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 28, 28, 1)]       0         
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 14, 14, 8)         208       
_________________________________________________________________
average_pooling2d_2 (Average (None, 7, 7, 8)           0         
_________________________________________________________________
conv2d_6 (Conv2D)            (None, 7, 7, 16)          1168      
_________________________________________________________________
average_pooling2d_3 (Average (None, 3, 3, 16)          0         
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 3, 3, 64)          4160      
_________________________________________________________________
conv2d_8 (Conv2D)            (None, 1, 1, 128)        

In [16]:
# =========================
# Compile model
# =========================
model.compile(
    loss = "sparse_categorical_crossentropy",
    metrics = ['accuracy'])
# =========================
# Training Parameters
# =========================
epochs = 100
patience = 10
# =========================
# Callbacks
# =========================
tb_cb = tf.keras.callbacks.TensorBoard("./Results_tensorboard/Ex_14_9/" + time.strftime("model_B_%Y_%m_%d_%H_%M_%S"))
es_cb = tf.keras.callbacks.EarlyStopping(patience= patience, restore_best_weights=True)
# =========================
# Training
# =========================
model.fit(
    x= x_train,
    y= y_train,
    epochs= epochs,
    callbacks= [tb_cb, es_cb],
    validation_data= (x_val, y_val))

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100


<tensorflow.python.keras.callbacks.History at 0x7fb555d34af0>

In [17]:
print(model.evaluate(x = x_train, y = y_train))
print(model.evaluate(x = x_val, y = y_val))

[0.12778420746326447, 0.9652500152587891]
[0.1573275476694107, 0.9562000036239624]


In [18]:
resultAct = model.evaluate(x = x_test, y = y_test)
print(resultAct)

[0.07679233700037003, 0.9775999784469604]


In [19]:
resultAct.insert(0,'B')
resultAct.insert(1,'Similar to LeNet including dropout at the final layers')
dfResults = dfResults.append(
    pd.DataFrame(columns = colResults, data=[resultAct]),
    ignore_index=True)
dfResults

Unnamed: 0,Model_name,Description,Loss,Accuracy
0,A,Similar to LeNet,0.049613,0.9882
1,B,Similar to LeNet including dropout at the fina...,0.076792,0.9776


### c) Local response normalization
Including some local response normalization layers at the beggining

In [20]:
inputs = tf.keras.Input(shape = (28,28,1))
x = tf.keras.layers.Conv2D(filters = 8, kernel_size = 5, strides = 2, padding = "same", activation = "relu")(inputs)
x = tf.keras.layers.AveragePooling2D(pool_size = 2, strides = 2, padding = "valid")(x)
x = tf.keras.layers.Lambda(lambda x: tf.nn.local_response_normalization(x, depth_radius=2, bias=1, alpha=0.00002, beta=0.75))(x)
x = tf.keras.layers.Conv2D(filters = 16, kernel_size = 3, strides = 1, padding = "same", activation = "relu")(x)
x = tf.keras.layers.AveragePooling2D(pool_size = 3, strides = 2, padding = "valid")(x)
x = tf.keras.layers.Lambda(lambda x: tf.nn.local_response_normalization(x, depth_radius=2, bias=1, alpha=0.00002, beta=0.75))(x)
x = tf.keras.layers.Conv2D(filters = 64, kernel_size = 2, strides = 1, padding = "same", activation = "relu")(x)
x = tf.keras.layers.Conv2D(filters = 128, kernel_size = 3, strides = 1, padding = "valid", activation = "relu")(x)
x = tf.keras.layers.Conv2D(filters = 32, kernel_size = 1, strides = 1, padding = "valid", activation = "relu")(x)
x = tf.keras.layers.Flatten()(x)
outputs = tf.keras.layers.Dense(units = 10, activation = "softmax")(x)

In [21]:
model = tf.keras.Model(inputs = inputs, outputs = outputs)
print(model.summary())

Model: "functional_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         [(None, 28, 28, 1)]       0         
_________________________________________________________________
conv2d_10 (Conv2D)           (None, 14, 14, 8)         208       
_________________________________________________________________
average_pooling2d_4 (Average (None, 7, 7, 8)           0         
_________________________________________________________________
lambda (Lambda)              (None, 7, 7, 8)           0         
_________________________________________________________________
conv2d_11 (Conv2D)           (None, 7, 7, 16)          1168      
_________________________________________________________________
average_pooling2d_5 (Average (None, 3, 3, 16)          0         
_________________________________________________________________
lambda_1 (Lambda)            (None, 3, 3, 16)         

In [22]:
# =========================
# Compile model
# =========================
model.compile(
    loss = "sparse_categorical_crossentropy",
    metrics = ['accuracy'])
# =========================
# Training Parameters
# =========================
epochs = 100
patience = 10
# =========================
# Callbacks
# =========================
tb_cb = tf.keras.callbacks.TensorBoard("./Results_tensorboard/Ex_14_9/" + time.strftime("model_C_%Y_%m_%d_%H_%M_%S"))
es_cb = tf.keras.callbacks.EarlyStopping(patience= patience, restore_best_weights=True)
# =========================
# Training
# =========================
model.fit(
    x= x_train,
    y= y_train,
    epochs= epochs,
    callbacks= [tb_cb, es_cb],
    validation_data= (x_val, y_val))

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100


<tensorflow.python.keras.callbacks.History at 0x7fb555d45d00>

In [23]:
print(model.evaluate(x = x_train, y = y_train))
print(model.evaluate(x = x_val, y = y_val))

[0.05024096742272377, 0.9860666394233704]
[0.07750502228736877, 0.9783999919891357]


In [24]:
resultAct = model.evaluate(x = x_test, y = y_test)
print(resultAct)

[0.045939572155475616, 0.9872000217437744]


In [25]:
resultAct.insert(0,'C')
resultAct.insert(1,'Including local responde normalization in the initial layers.')
dfResults = dfResults.append(
    pd.DataFrame(columns = colResults, data=[resultAct]),
    ignore_index=True)
dfResults

Unnamed: 0,Model_name,Description,Loss,Accuracy
0,A,Similar to LeNet,0.049613,0.9882
1,B,Similar to LeNet including dropout at the fina...,0.076792,0.9776
2,C,Including local responde normalization in the ...,0.04594,0.9872


### d) Going deeper
Including more convolutional layers with local response normalization

In [26]:
modelName = 'D'

inputs = tf.keras.Input(shape = (28,28,1))
x = tf.keras.layers.Conv2D(filters = 8, kernel_size = 5, strides = 2, padding = "same", activation = "relu")(inputs)
x = tf.keras.layers.Conv2D(filters = 16, kernel_size = 5, strides = 1, padding = "same", activation = "relu")(x)
x = tf.keras.layers.AveragePooling2D(pool_size = 2, strides = 2, padding = "valid")(x)
x = tf.keras.layers.Lambda(lambda x: tf.nn.local_response_normalization(x, depth_radius=2, bias=1, alpha=0.00002, beta=0.75))(x)
x = tf.keras.layers.Conv2D(filters = 32, kernel_size = 3, strides = 1, padding = "same", activation = "relu")(x)
x = tf.keras.layers.Conv2D(filters = 32, kernel_size = 3, strides = 1, padding = "same", activation = "relu")(x)
x = tf.keras.layers.AveragePooling2D(pool_size = 3, strides = 2, padding = "valid")(x)
x = tf.keras.layers.Lambda(lambda x: tf.nn.local_response_normalization(x, depth_radius=2, bias=1, alpha=0.00002, beta=0.75))(x)
x = tf.keras.layers.Conv2D(filters = 64, kernel_size = 2, strides = 1, padding = "same", activation = "relu")(x)
x = tf.keras.layers.Conv2D(filters = 64, kernel_size = 2, strides = 1, padding = "same", activation = "relu")(x)
x = tf.keras.layers.Conv2D(filters = 128, kernel_size = 3, strides = 1, padding = "valid", activation = "relu")(x)
x = tf.keras.layers.Conv2D(filters = 32, kernel_size = 1, strides = 1, padding = "valid", activation = "relu")(x)
x = tf.keras.layers.Flatten()(x)
outputs = tf.keras.layers.Dense(units = 10, activation = "softmax")(x)

model = tf.keras.Model(inputs = inputs, outputs = outputs)
print(model.summary())

Model: "functional_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_4 (InputLayer)         [(None, 28, 28, 1)]       0         
_________________________________________________________________
conv2d_15 (Conv2D)           (None, 14, 14, 8)         208       
_________________________________________________________________
conv2d_16 (Conv2D)           (None, 14, 14, 16)        3216      
_________________________________________________________________
average_pooling2d_6 (Average (None, 7, 7, 16)          0         
_________________________________________________________________
lambda_2 (Lambda)            (None, 7, 7, 16)          0         
_________________________________________________________________
conv2d_17 (Conv2D)           (None, 7, 7, 32)          4640      
_________________________________________________________________
conv2d_18 (Conv2D)           (None, 7, 7, 32)         

In [27]:
# =========================
# Compile model
# =========================
model.compile(
    loss = "sparse_categorical_crossentropy",
    metrics = ['accuracy'])
# =========================
# Training Parameters
# =========================
epochs = 100
patience = 10
# =========================
# Callbacks
# =========================
tb_cb = tf.keras.callbacks.TensorBoard("./Results_tensorboard/Ex_14_9/" + time.strftime(f"model_{modelName}_%Y_%m_%d_%H_%M_%S"))
es_cb = tf.keras.callbacks.EarlyStopping(patience= patience, restore_best_weights=True)
# =========================
# Training
# =========================
model.fit(
    x= x_train,
    y= y_train,
    epochs= epochs,
    callbacks= [tb_cb, es_cb],
    validation_data= (x_val, y_val))

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100


<tensorflow.python.keras.callbacks.History at 0x7fb555d45e50>

In [28]:
print(model.evaluate(x = x_train, y = y_train))
print(model.evaluate(x = x_val, y = y_val))
resultAct = model.evaluate(x = x_test, y = y_test)
print(resultAct)

[0.04548601433634758, 0.9865166544914246]
[0.07245191186666489, 0.9783999919891357]
[0.03557169437408447, 0.9901999831199646]


In [29]:
resultAct.insert(0,modelName)
resultAct.insert(1,'Including more layers in the LeNet architecture.')
dfResults = dfResults.append(
    pd.DataFrame(columns = colResults, data=[resultAct]),
    ignore_index=True)
dfResults

Unnamed: 0,Model_name,Description,Loss,Accuracy
0,A,Similar to LeNet,0.049613,0.9882
1,B,Similar to LeNet including dropout at the fina...,0.076792,0.9776
2,C,Including local responde normalization in the ...,0.04594,0.9872
3,D,Including more layers in the LeNet architecture.,0.035572,0.9902


### e) Including inception blocks

In [None]:
class inceptionLayer(tf.keras.layers.Layer):
    def __init__(self, filters = [8, 16, 4, 4, 12, 2], kw)