# Train Deep Neural Networks on MNIST dataset

In [1]:
import numpy as np
import tensorflow as tf
import os

In [2]:
output_dir = '.tmp'
if not os.path.exists(output_dir):
    os.mkdir(output_dir)

In [3]:
np.random.seed(2019)
tf.random.set_seed(2019)

## Load and preprocess data

In [4]:
mnist = tf.keras.datasets.mnist
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = X_train.reshape(-1, 28*28) / 255.0
X_test = X_test.reshape(-1, 28*28) / 255.0

In [5]:
def split_by_digits(X, y):
    return (X[y < 5], y[y < 5]), ((X[y >= 5], y[y >= 5]))

In [6]:
(X_train_0, y_train_0), (X_train_5, y_train_5) = split_by_digits(X_train, y_train)
(X_test_0, y_test_0), (X_test_5, y_test_5) = split_by_digits(X_test, y_test)

## Learn on digits 0-4

In [7]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam

def build_model(n_inputs, n_outputs, params):
    model = Sequential()
    layer_params = {
        'units': 100,
        'kernel_initializer': 'he_uniform',
        'activation': 'elu'
    }
    model.add(Dense(input_shape=n_inputs, **layer_params))
    for _ in range(4):
        model.add(Dense(**layer_params))
    model.add(Dense(n_outputs, activation='softmax'))
    model.compile(optimizer=Adam(**params),
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    return model

In [8]:
model_0 = build_model((28*28,), 5, {})
model_0.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 100)               78500     
_________________________________________________________________
dense_1 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_2 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_3 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_4 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_5 (Dense)              (None, 5)                 505       
Total params: 119,405
Trainable params: 119,405
Non-trainable params: 0
__________________________________________________

In [9]:
# checkpoints
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

filepath=os.path.join(output_dir, 'model-weights.hdf5')
checkpoint = ModelCheckpoint(filepath, monitor='val_accuracy', mode='max', verbose=1, save_best_only=True)
early_stopping = EarlyStopping(monitor='val_accuracy', mode='max', min_delta=1e-4, patience=10, restore_best_weights=True)

In [10]:
model_0.fit(X_train_0, y_train_0, 
            epochs=100, 
            verbose=0, 
            validation_data=(X_test_0, y_test_0), 
            callbacks = [checkpoint, early_stopping])


Epoch 00001: val_accuracy improved from -inf to 0.98093, saving model to .tmp\model-weights.hdf5

Epoch 00002: val_accuracy improved from 0.98093 to 0.98599, saving model to .tmp\model-weights.hdf5

Epoch 00003: val_accuracy improved from 0.98599 to 0.98871, saving model to .tmp\model-weights.hdf5

Epoch 00004: val_accuracy improved from 0.98871 to 0.99047, saving model to .tmp\model-weights.hdf5

Epoch 00005: val_accuracy did not improve from 0.99047

Epoch 00006: val_accuracy improved from 0.99047 to 0.99280, saving model to .tmp\model-weights.hdf5

Epoch 00007: val_accuracy did not improve from 0.99280

Epoch 00008: val_accuracy improved from 0.99280 to 0.99319, saving model to .tmp\model-weights.hdf5

Epoch 00009: val_accuracy did not improve from 0.99319

Epoch 00010: val_accuracy did not improve from 0.99319

Epoch 00011: val_accuracy improved from 0.99319 to 0.99436, saving model to .tmp\model-weights.hdf5

Epoch 00012: val_accuracy did not improve from 0.99436

Epoch 00013: va

<tensorflow.python.keras.callbacks.History at 0x257e6648c50>

### Tuning hyperparameters

In [11]:
%%time 

if True:
    params_grid = [{},
                  {'decay': 1e-10},
                  {'decay': 1e-5, 'lr': 1e-3},
                  {'beta_1': 0.89, 'beta_2': 0.99}]

    best_params = None
    best_acc = -1
    for params in params_grid:
        model = build_model((28*28,), 5, params)
        early_stopping = EarlyStopping(monitor='val_accuracy', mode='max', min_delta=1e-4, patience=10, restore_best_weights=True)
        model.fit(X_train_0, y_train_0, 
                  epochs=100, 
                  verbose=0, 
                  validation_data=(X_test_0, y_test_0), 
                  callbacks = [early_stopping])
        _, acc = model.evaluate(X_test_0, y_test_0, verbose=0)
        print(params, acc)
        if acc > best_acc:
            best_acc = acc
            best_params = params

    print('best accuracy:', best_acc)
    print('best_params:', params)
else:
    best_params =  {'beta_1': 0.89, 'beta_2': 0.99}

{} 0.9945515
{'decay': 1e-10} 0.9943569
{'decay': 1e-05, 'lr': 0.001} 0.99513525
{'beta_1': 0.89, 'beta_2': 0.99} 0.99532986
best accuracy: 0.99532986
best_params: {'beta_1': 0.89, 'beta_2': 0.99}
Wall time: 3min 45s


### Using batch normalization

#### Leaning with one normalization layer

In [12]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, BatchNormalization
from tensorflow.keras.optimizers import Adam

def build_model_batch_1(n_inputs, n_outputs, n_layers = 4, params = None):
    params = {} if params is None else params
    model = Sequential()
    layer_params = {
        'units': 100,
        'kernel_initializer': 'he_uniform',
        'activation': 'elu'
    }
    model.add(Dense(input_shape=n_inputs, **layer_params))
    for i in range(n_layers):
        model.add(Dense(**layer_params))
        if i == n_layers // 2:
            model.add(BatchNormalization())
    model.add(Dense(n_outputs, activation='softmax'))
    model.compile(optimizer=Adam(**params),
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    return model

In [13]:
model_0_batch_1 = build_model_batch_1((28*28,), 5)
model_0_batch_1.summary()

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_30 (Dense)             (None, 100)               78500     
_________________________________________________________________
dense_31 (Dense)             (None, 100)               10100     
_________________________________________________________________
dense_32 (Dense)             (None, 100)               10100     
_________________________________________________________________
dense_33 (Dense)             (None, 100)               10100     
_________________________________________________________________
batch_normalization_v2 (Batc (None, 100)               400       
_________________________________________________________________
dense_34 (Dense)             (None, 100)               10100     
_________________________________________________________________
dense_35 (Dense)             (None, 5)                

In [14]:
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

filepath=os.path.join(output_dir, 'model-weights-batch-1.hdf5')
checkpoint = ModelCheckpoint(filepath, monitor='val_accuracy', mode='max', verbose=1, save_best_only=True)
early_stopping = EarlyStopping(monitor='val_accuracy', mode='max', min_delta=1e-4, patience=10, restore_best_weights=True)

In [15]:
%%time

model_0_batch_1.fit(X_train_0, y_train_0, 
                    epochs=100, 
                    verbose=0, 
                    validation_data=(X_test_0, y_test_0), 
                    callbacks = [checkpoint, early_stopping])


Epoch 00001: val_accuracy improved from -inf to 0.98541, saving model to .tmp\model-weights-batch-1.hdf5

Epoch 00002: val_accuracy did not improve from 0.98541

Epoch 00003: val_accuracy improved from 0.98541 to 0.99222, saving model to .tmp\model-weights-batch-1.hdf5

Epoch 00004: val_accuracy did not improve from 0.99222

Epoch 00005: val_accuracy did not improve from 0.99222

Epoch 00006: val_accuracy did not improve from 0.99222

Epoch 00007: val_accuracy did not improve from 0.99222

Epoch 00008: val_accuracy improved from 0.99222 to 0.99358, saving model to .tmp\model-weights-batch-1.hdf5

Epoch 00009: val_accuracy improved from 0.99358 to 0.99397, saving model to .tmp\model-weights-batch-1.hdf5

Epoch 00010: val_accuracy did not improve from 0.99397

Epoch 00011: val_accuracy did not improve from 0.99397

Epoch 00012: val_accuracy did not improve from 0.99397

Epoch 00013: val_accuracy did not improve from 0.99397

Epoch 00014: val_accuracy did not improve from 0.99397

Epoch 

<tensorflow.python.keras.callbacks.History at 0x2579f27d710>

#### Training with 1 normalization layer and 10 inner layers

In [16]:
model_0_batch_1_10 = build_model_batch_1((28*28,), 5, 10)
model_0_batch_1_10.summary()

Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_36 (Dense)             (None, 100)               78500     
_________________________________________________________________
dense_37 (Dense)             (None, 100)               10100     
_________________________________________________________________
dense_38 (Dense)             (None, 100)               10100     
_________________________________________________________________
dense_39 (Dense)             (None, 100)               10100     
_________________________________________________________________
dense_40 (Dense)             (None, 100)               10100     
_________________________________________________________________
dense_41 (Dense)             (None, 100)               10100     
_________________________________________________________________
dense_42 (Dense)             (None, 100)              

In [17]:
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

filepath=os.path.join(output_dir, 'model-weights-batch-1-10.hdf5')
checkpoint = ModelCheckpoint(filepath, monitor='val_accuracy', mode='max', verbose=1, save_best_only=True)
early_stopping = EarlyStopping(monitor='val_accuracy', mode='max', min_delta=1e-4, patience=10, restore_best_weights=True)

In [18]:
model_0_batch_1_10.fit(X_train_0, y_train_0, 
                       epochs=100, 
                       verbose=0, 
                       validation_data=(X_test_0, y_test_0), 
                       callbacks = [checkpoint, early_stopping])


Epoch 00001: val_accuracy improved from -inf to 0.96517, saving model to .tmp\model-weights-batch-1-10.hdf5

Epoch 00002: val_accuracy improved from 0.96517 to 0.98112, saving model to .tmp\model-weights-batch-1-10.hdf5

Epoch 00003: val_accuracy improved from 0.98112 to 0.98541, saving model to .tmp\model-weights-batch-1-10.hdf5

Epoch 00004: val_accuracy improved from 0.98541 to 0.98969, saving model to .tmp\model-weights-batch-1-10.hdf5

Epoch 00005: val_accuracy did not improve from 0.98969

Epoch 00006: val_accuracy did not improve from 0.98969

Epoch 00007: val_accuracy improved from 0.98969 to 0.99163, saving model to .tmp\model-weights-batch-1-10.hdf5

Epoch 00008: val_accuracy did not improve from 0.99163

Epoch 00009: val_accuracy improved from 0.99163 to 0.99319, saving model to .tmp\model-weights-batch-1-10.hdf5

Epoch 00010: val_accuracy improved from 0.99319 to 0.99358, saving model to .tmp\model-weights-batch-1-10.hdf5

Epoch 00011: val_accuracy did not improve from 0.9

<tensorflow.python.keras.callbacks.History at 0x257a0caada0>

#### Learning with 4 inner layers and 4 normalization layers

In [19]:
def build_model_batch_n(n_inputs, n_outputs, n_layers = 4, params = None):
    params = {} if params is None else params
    model = Sequential()
    layer_params = {
        'units': 100,
        'kernel_initializer': 'he_uniform',
        'activation': 'elu'
    }
    model.add(Dense(input_shape=n_inputs, **layer_params))
    for _ in range(n_layers):
        model.add(Dense(**layer_params))
        model.add(BatchNormalization())
    model.add(Dense(n_outputs, activation='softmax'))
    model.compile(optimizer=Adam(**params),
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    return model

In [20]:
model_0_batch_n_4 = build_model_batch_n((28*28,), 5)
model_0_batch_n_4.summary()

Model: "sequential_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_48 (Dense)             (None, 100)               78500     
_________________________________________________________________
dense_49 (Dense)             (None, 100)               10100     
_________________________________________________________________
batch_normalization_v2_2 (Ba (None, 100)               400       
_________________________________________________________________
dense_50 (Dense)             (None, 100)               10100     
_________________________________________________________________
batch_normalization_v2_3 (Ba (None, 100)               400       
_________________________________________________________________
dense_51 (Dense)             (None, 100)               10100     
_________________________________________________________________
batch_normalization_v2_4 (Ba (None, 100)              

In [21]:
filepath=os.path.join(output_dir, 'model-weights-batch-n-4.hdf5')
checkpoint = ModelCheckpoint(filepath, monitor='val_accuracy', mode='max', verbose=1, save_best_only=True)
early_stopping = EarlyStopping(monitor='val_accuracy', mode='max', min_delta=1e-4, patience=10, restore_best_weights=True)

In [22]:
%%time

model_0_batch_n_4.fit(X_train_0, y_train_0, 
                      epochs=100, 
                      verbose=0, 
                      validation_data=(X_test_0, y_test_0), 
                      callbacks = [checkpoint, early_stopping])


Epoch 00001: val_accuracy improved from -inf to 0.98755, saving model to .tmp\model-weights-batch-n-4.hdf5

Epoch 00002: val_accuracy improved from 0.98755 to 0.98910, saving model to .tmp\model-weights-batch-n-4.hdf5

Epoch 00003: val_accuracy did not improve from 0.98910

Epoch 00004: val_accuracy improved from 0.98910 to 0.99066, saving model to .tmp\model-weights-batch-n-4.hdf5

Epoch 00005: val_accuracy improved from 0.99066 to 0.99144, saving model to .tmp\model-weights-batch-n-4.hdf5

Epoch 00006: val_accuracy did not improve from 0.99144

Epoch 00007: val_accuracy improved from 0.99144 to 0.99299, saving model to .tmp\model-weights-batch-n-4.hdf5

Epoch 00008: val_accuracy did not improve from 0.99299

Epoch 00009: val_accuracy did not improve from 0.99299

Epoch 00010: val_accuracy did not improve from 0.99299

Epoch 00011: val_accuracy improved from 0.99299 to 0.99397, saving model to .tmp\model-weights-batch-n-4.hdf5

Epoch 00012: val_accuracy did not improve from 0.99397



<tensorflow.python.keras.callbacks.History at 0x257a41ddcf8>

#### Learning with 10 inner layers and 10 normalization layers

In [23]:
model_0_batch_n_10 = build_model_batch_n((28*28,), 5, n_layers=10)
model_0_batch_n_10.summary()

Model: "sequential_8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_54 (Dense)             (None, 100)               78500     
_________________________________________________________________
dense_55 (Dense)             (None, 100)               10100     
_________________________________________________________________
batch_normalization_v2_6 (Ba (None, 100)               400       
_________________________________________________________________
dense_56 (Dense)             (None, 100)               10100     
_________________________________________________________________
batch_normalization_v2_7 (Ba (None, 100)               400       
_________________________________________________________________
dense_57 (Dense)             (None, 100)               10100     
_________________________________________________________________
batch_normalization_v2_8 (Ba (None, 100)              

In [24]:
filepath=os.path.join(output_dir, 'model-weights-batch-n-10.hdf5')
checkpoint = ModelCheckpoint(filepath, monitor='val_accuracy', mode='max', verbose=1, save_best_only=True)
early_stopping = EarlyStopping(monitor='val_accuracy', mode='max', min_delta=1e-4, patience=10, restore_best_weights=True)

In [25]:
%%time

model_0_batch_n_10.fit(X_train_0, y_train_0, 
                       epochs=100, 
                       verbose=0, 
                       validation_data=(X_test_0, y_test_0), 
                       callbacks = [checkpoint, early_stopping])


Epoch 00001: val_accuracy improved from -inf to 0.98229, saving model to .tmp\model-weights-batch-n-10.hdf5

Epoch 00002: val_accuracy improved from 0.98229 to 0.98618, saving model to .tmp\model-weights-batch-n-10.hdf5

Epoch 00003: val_accuracy improved from 0.98618 to 0.99027, saving model to .tmp\model-weights-batch-n-10.hdf5

Epoch 00004: val_accuracy did not improve from 0.99027

Epoch 00005: val_accuracy did not improve from 0.99027

Epoch 00006: val_accuracy improved from 0.99027 to 0.99144, saving model to .tmp\model-weights-batch-n-10.hdf5

Epoch 00007: val_accuracy improved from 0.99144 to 0.99241, saving model to .tmp\model-weights-batch-n-10.hdf5

Epoch 00008: val_accuracy did not improve from 0.99241

Epoch 00009: val_accuracy did not improve from 0.99241

Epoch 00010: val_accuracy did not improve from 0.99241

Epoch 00011: val_accuracy did not improve from 0.99241

Epoch 00012: val_accuracy did not improve from 0.99241

Epoch 00013: val_accuracy improved from 0.99241 to

<tensorflow.python.keras.callbacks.History at 0x257ab5343c8>