In [2]:
%matplotlib inline

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Dense, Flatten, Dropout, BatchNormalization
from tensorflow.keras.callbacks import TensorBoard

# 02 Training and Improving NNs - Lecture Demos

### Prepare MNIST

In [3]:
(train_data, train_classes), (test_data, test_classes) = mnist.load_data()

In [4]:
train_data.shape, train_classes.shape

((60000, 28, 28), (60000,))

In [5]:
test_data.shape, test_classes.shape

((10000, 28, 28), (10000,))

In [6]:
input_shape = train_data[0].shape
n_classes = len(set(train_classes))

print(input_shape, n_classes)

(28, 28) 10


### Model 1 - baseline

In [7]:
model = Sequential([
    Input(input_shape),
    Flatten(),
    Dense(40),
    Dense(30),
    Dense(20),
    Dense(n_classes, activation='softmax'),
])

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 40)                31400     
                                                                 
 dense_1 (Dense)             (None, 30)                1230      
                                                                 
 dense_2 (Dense)             (None, 20)                620       
                                                                 
 dense_3 (Dense)             (None, 10)                210       
                                                                 
Total params: 33,460
Trainable params: 33,460
Non-trainable params: 0
_________________________________________________________________


In [8]:
model.compile(
    loss=tf.keras.losses.sparse_categorical_crossentropy, 
    optimizer=tf.keras.optimizers.Adam(), 
    # metrics=[tf.keras.metrics.Accuracy()],
)

In [9]:
model.fit(train_data, train_classes, validation_split=0.1)



<keras.callbacks.History at 0x228b2529c70>

### Model 2 - activation

In [10]:
model = Sequential([
    Input(input_shape),
    Flatten(),
    Dense(40, activation='relu'),
    Dense(30, activation='relu'),
    Dense(20, activation='relu'),
    Dense(n_classes, activation='softmax'),
])

model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_1 (Flatten)         (None, 784)               0         
                                                                 
 dense_4 (Dense)             (None, 40)                31400     
                                                                 
 dense_5 (Dense)             (None, 30)                1230      
                                                                 
 dense_6 (Dense)             (None, 20)                620       
                                                                 
 dense_7 (Dense)             (None, 10)                210       
                                                                 
Total params: 33,460
Trainable params: 33,460
Non-trainable params: 0
_________________________________________________________________


In [11]:
model.compile(
    loss=tf.keras.losses.sparse_categorical_crossentropy, 
    optimizer=tf.keras.optimizers.Adam(), 
)

In [12]:
model.fit(train_data, train_classes, validation_split=0.1)



<keras.callbacks.History at 0x228b62c32b0>

### Model 3 - regularizers

In [15]:
model = Sequential([
    Input(input_shape),
    Flatten(),
    Dense(40, kernel_regularizer=tf.keras.regularizers.L1L2(l1=0.01,l2=0.001), activation='relu'),
    Dense(30, activation='relu'),
    Dense(20, activation='relu'),
    Dense(n_classes, activation='softmax'),
])

model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_3 (Flatten)         (None, 784)               0         
                                                                 
 dense_8 (Dense)             (None, 40)                31400     
                                                                 
 dense_9 (Dense)             (None, 30)                1230      
                                                                 
 dense_10 (Dense)            (None, 20)                620       
                                                                 
 dense_11 (Dense)            (None, 10)                210       
                                                                 
Total params: 33,460
Trainable params: 33,460
Non-trainable params: 0
_________________________________________________________________


In [16]:
model.compile(
    loss=tf.keras.losses.sparse_categorical_crossentropy, 
    optimizer=tf.keras.optimizers.Adam(), 
)

In [17]:
model.fit(train_data, train_classes, validation_split=0.1)



<keras.callbacks.History at 0x228b60c0a60>

### Model 4 - dropout

Usually, dropout is used instead of regularizers.

The dropout `rate` can be different.

Dropout is applied after the previous layer.

In [19]:
model = Sequential([
    Input(input_shape),
    Flatten(),
    Dense(40, activation='relu'),
    Dropout(rate=0.1),
    Dense(30, activation='relu'),
    Dropout(rate=0.05),
    Dense(20, activation='relu'),
    Dense(n_classes, activation='softmax'),
])

model.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_4 (Flatten)         (None, 784)               0         
                                                                 
 dense_12 (Dense)            (None, 40)                31400     
                                                                 
 dropout (Dropout)           (None, 40)                0         
                                                                 
 dense_13 (Dense)            (None, 30)                1230      
                                                                 
 dropout_1 (Dropout)         (None, 30)                0         
                                                                 
 dense_14 (Dense)            (None, 20)                620       
                                                                 
 dense_15 (Dense)            (None, 10)               

In [20]:
model.compile(
    loss=tf.keras.losses.sparse_categorical_crossentropy, 
    optimizer=tf.keras.optimizers.Adam(), 
)

In [21]:
model.fit(train_data, train_classes, validation_split=0.1)



<keras.callbacks.History at 0x228b7868f70>

### Model 5 - tensorboard

In [19]:
model = Sequential([
    Input(input_shape),
    Flatten(),
    Dense(40, activation='relu'),
    Dropout(rate=0.1),
    Dense(30, activation='relu'),
    Dropout(rate=0.05),
    Dense(20, activation='relu'),
    Dense(n_classes, activation='softmax'),
])

model.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_4 (Flatten)         (None, 784)               0         
                                                                 
 dense_12 (Dense)            (None, 40)                31400     
                                                                 
 dropout (Dropout)           (None, 40)                0         
                                                                 
 dense_13 (Dense)            (None, 30)                1230      
                                                                 
 dropout_1 (Dropout)         (None, 30)                0         
                                                                 
 dense_14 (Dense)            (None, 20)                620       
                                                                 
 dense_15 (Dense)            (None, 10)               

In [20]:
model.compile(
    loss=tf.keras.losses.sparse_categorical_crossentropy, 
    optimizer=tf.keras.optimizers.Adam(), 
)

In [None]:
model.fit(
    x=train_data,
    y=train_classes,
    validation_split=0.1,
    epochs=10,
    callbacks=[TensorBoard(),],
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
  76/1688 [>.............................] - ETA: 5s - loss: 1.0221

### Model 6 - weights initializers

In [25]:
new_model = Sequential([
    Input(shape=(20,)),
    Dense(5),
    Dense(10),
    Dense(2),
])

In [29]:
new_model.layers[0].kernel  # Shape is (20, 5)

<tf.Variable 'dense_16/kernel:0' shape=(20, 5) dtype=float32, numpy=
array([[ 0.08362398,  0.09778807,  0.08567849,  0.23466697, -0.13509059],
       [-0.32277614, -0.3444387 ,  0.34227124,  0.11765483,  0.33557817],
       [-0.1997306 ,  0.37902853,  0.2853416 ,  0.30499277,  0.35001746],
       [-0.27017736,  0.04396906,  0.02318951, -0.43664822, -0.01208434],
       [ 0.0422965 , -0.45406416, -0.24506332,  0.46997055,  0.4052585 ],
       [ 0.14840695, -0.46192193, -0.0698027 , -0.18631172,  0.46106836],
       [-0.28739658,  0.1136981 ,  0.352779  ,  0.27175334,  0.47735342],
       [ 0.4694228 , -0.06244263,  0.1153473 , -0.35899884, -0.36681843],
       [-0.22347516,  0.26761857, -0.38983718, -0.12819925,  0.34945753],
       [ 0.29177126, -0.2604078 , -0.34890962, -0.33208993, -0.19314656],
       [ 0.4157466 , -0.22383934, -0.4123483 ,  0.06362841, -0.0196321 ],
       [-0.17551044,  0.4265884 , -0.42637515,  0.19248441, -0.35184914],
       [ 0.22357723, -0.02982366, -0.102280

In [31]:
new_model.layers[1].kernel  # Shape is (10, 5)

<tf.Variable 'dense_17/kernel:0' shape=(5, 10) dtype=float32, numpy=
array([[ 0.06221592, -0.16700593,  0.36394572,  0.0834235 , -0.09763598,
        -0.53136575, -0.3250643 , -0.08648252,  0.00359237,  0.40301114],
       [-0.22884741, -0.09620994,  0.11201131,  0.49379295,  0.59470063,
        -0.33178845, -0.19935095, -0.42648253,  0.07278019, -0.38036323],
       [ 0.06149679, -0.50385845, -0.4469502 ,  0.47943097, -0.3826065 ,
         0.5969114 ,  0.37790126, -0.30654302,  0.42641062,  0.53824633],
       [ 0.47752255, -0.2794031 , -0.53202873,  0.3784588 , -0.5498869 ,
         0.54257685, -0.10970753,  0.13454676,  0.12194788, -0.32304752],
       [-0.2873153 , -0.0114215 , -0.34724373,  0.4577499 , -0.2520647 ,
         0.27173352,  0.46544188, -0.06075174, -0.61788535, -0.11282951]],
      dtype=float32)>

In [32]:
new_model.layers[0].kernel_initializer  # Glorot

<keras.initializers.initializers_v2.GlorotUniform at 0x228b57626a0>

In [33]:
new_model.layers[0].kernel_initializer

<keras.initializers.initializers_v2.GlorotUniform at 0x228b57626a0>

### Model 7 - random search

### Model 8 - batch normalization

In [None]:
model = Sequential([
    Input(input_shape),
    Flatten(),
    Dense(40, activation='relu'),
    Dropout(rate=0.1),
    BatchNormalization(),  # New
    Dense(30, activation='relu'),
    Dropout(rate=0.05),
    Dense(20, activation='relu'),
    Dense(n_classes, activation='softmax'),
])

model.summary()