In [1]:
# 8) Deep Learning
#     a) Build a  DNN with five hidden layers of 100 neurons
#     each, He initialization, 
#     and the ELU activation function.
    
#     b) Using the Adam optimization and early stopping, 
#     try training it on MNIST but only on digits 0 to 4,
#     as we will use transfer learning for digits 5 to 9
#     in the next exercise. You will need a 
#     softmax output layer with 5 neurons, 
#     and as always make sure to 
#     save checkpoints at regular intervals and save 
#     the final model so you can reuse it later.


In [2]:
from __future__ import absolute_import, division, print_function, unicode_literals

import tensorflow as tf
import numpy as np
from sklearn.model_selection import GridSearchCV

In [3]:
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()

In [4]:
X_train, X_test = X_train / 255.0, X_test / 255.0

In [5]:
X_train_0_to_4, y_train_0_to_4 = X_train[y_train <= 4], y_train[y_train <= 4]

X_test_0_to_4, y_test_0_to_4 = X_test[y_test <= 4], y_test[y_test <= 4]

In [6]:
def create_model():
    he_initializer = tf.keras.initializers.he_normal(seed=None)

    model = tf.keras.models.Sequential([
        tf.keras.layers.Flatten(input_shape=(28, 28)),
        tf.keras.layers.Dense(100, activation='elu', kernel_initializer=he_initializer),
        tf.keras.layers.Dense(100, activation='elu', kernel_initializer=he_initializer),
        tf.keras.layers.Dense(100, activation='elu', kernel_initializer=he_initializer),
        tf.keras.layers.Dense(100, activation='elu', kernel_initializer=he_initializer),
        tf.keras.layers.Dense(100, activation='elu', kernel_initializer=he_initializer),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(5, activation='softmax')    
    ])

    model.compile(optimizer='adam', 
                  loss='sparse_categorical_crossentropy', 
                  metrics=['accuracy'])
    return model

In [7]:
import os
checkpoint_path = "training_0_4/cp.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)

In [8]:
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                 save_weights_only=True,
                                                 verbose=1)

In [213]:
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='loss', min_delta=0, patience=20, verbose=2)

In [36]:
model = tf.keras.wrappers.scikit_learn.KerasClassifier(build_fn=create_model)
# model = create_model()

# Applying Grid Search from scikit-learn
# https://machinelearningmastery.com/grid-search-hyperparameters-deep-learning-models-python-keras/

In [None]:
# START HERE

In [35]:
param_grid = dict(epochs=[10, 20, 30], batch_size=[10, 20, 40])
grid = GridSearchCV(estimator=model, param_grid=param_grid, cv=3, error_score='raise', verbose=2)

grid_result = grid.fit(X_train_0_to_4, y_train_0_to_4)

Fitting 3 folds for each of 9 candidates, totalling 27 fits
[CV] batch_size=10, epochs=10 ........................................


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Train on 20397 samples
Epoch 1/10

KeyboardInterrupt: 

In [69]:
model = tf.keras.wrappers.scikit_learn.KerasClassifier(build_fn=create_model)
model.fit(X_train_0_to_4, y_train_0_to_4, **best_params, callbacks=[early_stopping], validation_data=(X_test_0_to_4,y_test_0_to_4))

Train on 30596 samples, validate on 5139 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<tensorflow.python.keras.callbacks.History at 0x1678d1610>

In [38]:
def create_model_with_batch_norm():
    he_initializer = tf.keras.initializers.he_normal(seed=None)

    model = tf.keras.models.Sequential([
        tf.keras.layers.Flatten(input_shape=(28, 28)),
        tf.keras.layers.BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001, center=True, scale=True),
        tf.keras.layers.Dense(100, activation='elu', kernel_initializer=he_initializer),
        tf.keras.layers.Dense(100, activation='elu', kernel_initializer=he_initializer),
        tf.keras.layers.Dense(100, activation='elu', kernel_initializer=he_initializer),
        tf.keras.layers.Dense(100, activation='elu', kernel_initializer=he_initializer),
        tf.keras.layers.Dense(100, activation='elu', kernel_initializer=he_initializer),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(5, activation='softmax')    
    ])

    model.compile(optimizer='adam', 
                  loss='sparse_categorical_crossentropy', 
                  metrics=['accuracy'])
    return model


In [67]:
model_bn = tf.keras.wrappers.scikit_learn.KerasClassifier(build_fn=create_model_with_batch_norm)


In [68]:
model_bn.fit(X_train_0_to_4, y_train_0_to_4, **best_params, callbacks=[early_stopping], validation_data=(X_test_0_to_4,y_test_0_to_4))

Train on 30596 samples, validate on 5139 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 00029: early stopping


<tensorflow.python.keras.callbacks.History at 0x16ed43390>

In [23]:
param_grid = dict(epochs=[10, 20, 30], batch_size=[10, 20, 40])
grid = GridSearchCV(estimator=model, param_grid=param_grid, cv=3, error_score='raise', verbose=2)

grid_result = grid.fit(X_train_0_to_4, y_train_0_to_4)

Fitting 3 folds for each of 9 candidates, totalling 27 fits
[CV] batch_size=10, epochs=10 ........................................


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Train on 20397 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


[CV] ......................... batch_size=10, epochs=10, total= 1.0min
[CV] batch_size=10, epochs=10 ........................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:  1.0min remaining:    0.0s


Train on 20397 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


[CV] ......................... batch_size=10, epochs=10, total=  59.8s
[CV] batch_size=10, epochs=10 ........................................
Train on 20398 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


[CV] ......................... batch_size=10, epochs=10, total= 1.0min
[CV] batch_size=10, epochs=20 ........................................
Train on 20397 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


[CV] ......................... batch_size=10, epochs=20, total= 2.1min
[CV] batch_size=10, epochs=20 ........................................
Train on 20397 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


[CV] ......................... batch_size=10, epochs=20, total= 2.1min
[CV] batch_size=10, epochs=20 ........................................
Train on 20398 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


[CV] ......................... batch_size=10, epochs=20, total= 2.0min
[CV] batch_size=10, epochs=30 ........................................
Train on 20397 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


[CV] ......................... batch_size=10, epochs=30, total= 3.1min
[CV] batch_size=10, epochs=30 ........................................
Train on 20397 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


[CV] ......................... batch_size=10, epochs=30, total= 3.0min
[CV] batch_size=10, epochs=30 ........................................
Train on 20398 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


[CV] ......................... batch_size=10, epochs=30, total= 3.0min
[CV] batch_size=20, epochs=10 ........................................
Train on 20397 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


[CV] ......................... batch_size=20, epochs=10, total=  34.7s
[CV] batch_size=20, epochs=10 ........................................
Train on 20397 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


[CV] ......................... batch_size=20, epochs=10, total=  32.4s
[CV] batch_size=20, epochs=10 ........................................
Train on 20398 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


[CV] ......................... batch_size=20, epochs=10, total=36.1min
[CV] batch_size=20, epochs=20 ........................................
Train on 20397 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


[CV] ......................... batch_size=20, epochs=20, total= 1.6min
[CV] batch_size=20, epochs=20 ........................................
Train on 20397 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


[CV] ......................... batch_size=20, epochs=20, total=36.8min
[CV] batch_size=20, epochs=20 ........................................
Train on 20398 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


[CV] ........................ batch_size=20, epochs=20, total=121.3min
[CV] batch_size=20, epochs=30 ........................................
Train on 20397 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


[CV] ......................... batch_size=20, epochs=30, total= 2.2min
[CV] batch_size=20, epochs=30 ........................................
Train on 20397 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


[CV] ........................ batch_size=20, epochs=30, total=122.1min
[CV] batch_size=20, epochs=30 ........................................
Train on 20398 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


[CV] ........................ batch_size=20, epochs=30, total=122.1min
[CV] batch_size=40, epochs=10 ........................................
Train on 20397 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


[CV] ......................... batch_size=40, epochs=10, total=  29.9s
[CV] batch_size=40, epochs=10 ........................................
Train on 20397 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


[CV] ......................... batch_size=40, epochs=10, total=  25.4s
[CV] batch_size=40, epochs=10 ........................................
Train on 20398 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


[CV] ......................... batch_size=40, epochs=10, total=66.6min
[CV] batch_size=40, epochs=20 ........................................
Train on 20397 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


[CV] ......................... batch_size=40, epochs=20, total= 1.1min
[CV] batch_size=40, epochs=20 ........................................
Train on 20397 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


[CV] ......................... batch_size=40, epochs=20, total= 1.1min
[CV] batch_size=40, epochs=20 ........................................
Train on 20398 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


[CV] ......................... batch_size=40, epochs=20, total= 1.6min
[CV] batch_size=40, epochs=30 ........................................
Train on 20397 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


[CV] ......................... batch_size=40, epochs=30, total= 2.0min
[CV] batch_size=40, epochs=30 ........................................
Train on 20397 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


[CV] ......................... batch_size=40, epochs=30, total= 1.2min
[CV] batch_size=40, epochs=30 ........................................
Train on 20398 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


[CV] ......................... batch_size=40, epochs=30, total= 1.2min


[Parallel(n_jobs=1)]: Done  27 out of  27 | elapsed: 537.3min finished


Train on 30596 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [49]:
best_params = grid_result.best_params_

In [50]:
best_params

{'batch_size': 40, 'epochs': 30}

In [None]:
grid_bn = create_model_bn

In [None]:
# Run grid search on model_bn, take best params and use those to compare model and model_bn
# Don't bother running GridSearch on model for now

In [51]:
model.fit(X_train_0_to_4, y_train_0_to_4, callbacks=[early_stopping], validation_data=(X_test_0_to_4,y_test_0_to_4))

Train on 30596 samples, validate on 5139 samples


<tensorflow.python.keras.callbacks.History at 0x1667f1050>

In [47]:
model_bn.fit(X_train_0_to_4, y_train_0_to_4, callbacks=[early_stopping], validation_data=(X_test_0_to_4,y_test_0_to_4))

Train on 30596 samples, validate on 5139 samples


<tensorflow.python.keras.callbacks.History at 0x162ce6550>

In [None]:
# TODO: add 5-9
def create_model_():
    he_initializer = tf.keras.initializers.he_normal(seed=None)

    model = tf.keras.models.Sequential([
        tf.keras.layers.Flatten(input_shape=(28, 28)),
        tf.keras.layers.Dense(100, activation='elu', kernel_initializer=he_initializer),
        tf.keras.layers.Dense(100, activation='elu', kernel_initializer=he_initializer),
        tf.keras.layers.Dense(100, activation='elu', kernel_initializer=he_initializer),
        tf.keras.layers.Dense(100, activation='elu', kernel_initializer=he_initializer),
        tf.keras.layers.Dense(100, activation='elu', kernel_initializer=he_initializer),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(5, activation='softmax')    
    ])

    model.compile(optimizer='adam', 
                  loss='sparse_categorical_crossentropy', 
                  metrics=['accuracy'])
    return model

In [None]:
############################

In [52]:
def create_model_with_batch_norm_and_dropout():
    he_initializer = tf.keras.initializers.he_normal(seed=None)

    model = tf.keras.models.Sequential([
        tf.keras.layers.Flatten(input_shape=(28, 28)),
        tf.keras.layers.BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001, center=True, scale=True),
        tf.keras.layers.Dense(100, activation='elu', kernel_initializer=he_initializer),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(100, activation='elu', kernel_initializer=he_initializer),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(100, activation='elu', kernel_initializer=he_initializer),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(100, activation='elu', kernel_initializer=he_initializer),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(100, activation='elu', kernel_initializer=he_initializer),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(5, activation='softmax')    
    ])

    model.compile(optimizer='adam', 
                  loss='sparse_categorical_crossentropy', 
                  metrics=['accuracy'])
    return model


In [53]:
def create_model_with_dropout():
    he_initializer = tf.keras.initializers.he_normal(seed=None)

    model = tf.keras.models.Sequential([
        tf.keras.layers.Flatten(input_shape=(28, 28)),
        tf.keras.layers.BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001, center=True, scale=True),
        tf.keras.layers.Dense(100, activation='elu', kernel_initializer=he_initializer),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(100, activation='elu', kernel_initializer=he_initializer),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(100, activation='elu', kernel_initializer=he_initializer),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(100, activation='elu', kernel_initializer=he_initializer),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(100, activation='elu', kernel_initializer=he_initializer),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(5, activation='softmax')    
    ])

    model.compile(optimizer='adam', 
                  loss='sparse_categorical_crossentropy', 
                  metrics=['accuracy'])
    return model


In [64]:
model_bn_do = create_model_with_batch_norm_and_dropout()
model_bn_do_result = model_bn_do.fit(X_train_0_to_4, y_train_0_to_4, epochs=30, batch_size=40, callbacks=[early_stopping], validation_data=(X_test_0_to_4,y_test_0_to_4))

Train on 30596 samples, validate on 5139 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [76]:
model_bn_do_result.history.keys()

dict_keys(['loss', 'accuracy', 'val_loss', 'val_accuracy'])

In [84]:
model_bn_do_result.history['val_accuracy']

[0.9832652,
 0.9850165,
 0.98871374,
 0.98910296,
 0.9867678,
 0.9867678,
 0.98910296,
 0.9898813,
 0.98890835,
 0.9900759,
 0.9918272,
 0.99143803,
 0.99143803,
 0.99085426,
 0.99241096,
 0.99065965,
 0.9922164,
 0.99085426,
 0.99260557,
 0.99065965,
 0.9912434,
 0.9918272,
 0.9896867,
 0.9920218,
 0.9918272,
 0.99338394,
 0.99338394,
 0.99260557,
 0.98910296,
 0.99416226]

In [60]:
model_do = create_model_with_dropout()
model_do_result = model_do.fit(X_train_0_to_4, y_train_0_to_4, epochs=30, batch_size=40, callbacks=[early_stopping], validation_data=(X_test_0_to_4,y_test_0_to_4))

Train on 30596 samples, validate on 5139 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [None]:
############################

In [None]:
# 11.9 Transfer Learning

In [None]:
############################

In [111]:
# 11.9.A) Create new DNN and add pretrained hidden layers from model_bn_do

In [110]:
from tensorflow.keras import layers

In [85]:
model_bn_do.summary()

Model: "sequential_58"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_58 (Flatten)         (None, 784)               0         
_________________________________________________________________
batch_normalization_46 (Batc (None, 784)               3136      
_________________________________________________________________
dense_348 (Dense)            (None, 100)               78500     
_________________________________________________________________
dropout_86 (Dropout)         (None, 100)               0         
_________________________________________________________________
dense_349 (Dense)            (None, 100)               10100     
_________________________________________________________________
dropout_87 (Dropout)         (None, 100)               0         
_________________________________________________________________
dense_350 (Dense)            (None, 100)             

In [105]:
new_model = tf.keras.models.Sequential()

In [107]:
# https://stackoverflow.com/questions/55335228/how-to-remove-the-last-layer-from-a-pre-trained-model-i-have-tried-model-layers
# Add all layers except softmax layer (output layer)

for layer in model_bn_do.layers[:-1]:
    new_model.add(layer)

In [112]:
# Freeze the layers
for layer in new_model.layers:
    layer.trainable = False

In [114]:
# Add new softmax layer with 10 output nodes
new_model.add(layers.Dense(10, activation='softmax'))

In [115]:
new_model.summary()

Model: "sequential_65"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_58 (Flatten)         (None, 784)               0         
_________________________________________________________________
batch_normalization_46 (Batc (None, 784)               3136      
_________________________________________________________________
dense_348 (Dense)            (None, 100)               78500     
_________________________________________________________________
dropout_86 (Dropout)         (None, 100)               0         
_________________________________________________________________
dense_349 (Dense)            (None, 100)               10100     
_________________________________________________________________
dropout_87 (Dropout)         (None, 100)               0         
_________________________________________________________________
dense_350 (Dense)            (None, 100)             

In [116]:
# 11.9.B) Train new DNN on digits 5-9 using only 100 images per digit, and time how long it takes

In [117]:
X_train_5_to_9, y_train_5_to_9 = X_train[y_train >= 5], y_train[y_train >= 5]
X_test_5_to_9, y_test_5_to_9 = X_test[y_test >= 5], y_test[y_test >= 5]

In [118]:
X_train_all_5s, y_train_all_5s = X_train[y_train == 5], X_train[y_train == 5]

In [152]:
X_train_all_5s, y_train_all_5s = X_train[y_train == 5][:100], X_train[y_train == 5][:100]
len(X_train_all_5s)

100

In [153]:
X_test_all_5s, y_test_all_5s = X_test[y_test == 5][:100], X_test[y_test == 5][:100]
len(X_test_all_5s)

100

In [205]:
X_train_5_to_9_100, y_train_5_to_9_100 = X_train_all_5s, y_train_all_5s

for num in range(6, 10):
    next_X_train, next_y_train = X_train[y_train == num][:100], X_train[y_train == num][:100]
    X_train_5_to_9_100 = np.concatenate((X_train_5_to_9_100, next_X_train), axis=0)
    y_train_5_to_9_100 = np.concatenate((y_train_5_to_9_100, next_y_train))
    
print('total length X: ', X_train_5_to_9_100.shape)
print('total length y: ', y_train_5_to_9_100.shape)

total length X:  (500, 28, 28)
total length y:  (500, 28, 28)


In [210]:
X_test_5_to_9_100, y_test_5_to_9_100 = X_test_all_5s, y_test_all_5s

for num in range(6, 10):
    next_X_test, next_y_test = X_test[y_test == num][:100], X_test[y_test == num][:100]
    X_test_5_to_9_100 = np.concatenate((X_test_5_to_9_100, next_X_test), axis=0)
    y_test_5_to_9_100 = np.concatenate((y_test_5_to_9_100, next_y_test))
    
print('total length X: ', X_test_5_to_9_100.shape)
print('total length y: ', y_test_5_to_9_100.shape)
print('type y: ', type(y_test_5_to_9_100))

total length X:  (500, 28, 28)
total length y:  (500, 28, 28)
type y:  <class 'numpy.ndarray'>


In [209]:
print('total length X: ', X_test_0_to_4.shape)
print('total length y: ', y_test_0_to_4.shape)
print('type y: ', type(y_test_0_to_4))

total length X:  (5139, 28, 28)
total length y:  (5139,)
type y:  <class 'numpy.ndarray'>


In [195]:
new_model.compile(optimizer='adam', 
              loss='sparse_categorical_crossentropy', 
              metrics=['accuracy'])

In [199]:
# new_model.summary()

In [215]:
new_model.fit(X_train_5_to_9, y_train_5_to_9, epochs=50, batch_size=40, validation_data=(X_test_5_to_9, y_test_5_to_9))

Train on 29404 samples, validate on 4861 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<tensorflow.python.keras.callbacks.History at 0x194905850>

In [174]:
print(y_train_0_to_4.shape)
print(y_train_5_to_9_100.shape)

()
(28, 28)


In [178]:
# X_train_0_to_4, y_train_0_to_4 = X_train[y_train <= 4], y_train[y_train <= 4]
y_train_0_to_4.shape

(30596,)

In [217]:
# 11.9.C) Try caching the frozen layers, and train the model again: how much faster is it now?

In [229]:
cached_frozen_model = tf.keras.models.Sequential()
cached_frozen_model.add(new_model)

In [230]:
cached_frozen_model.summary()

Model: "sequential_69"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
sequential_65 (Sequential)   (None, 10)                123046    
Total params: 123,046
Trainable params: 1,010
Non-trainable params: 122,036
_________________________________________________________________


In [232]:
cached_frozen_model.compile(optimizer='adam', 
                  loss='sparse_categorical_crossentropy', 
                  metrics=['accuracy'])

In [233]:
cached_frozen_model.fit(X_train_5_to_9, y_train_5_to_9, epochs=50, batch_size=40, validation_data=(X_test_5_to_9, y_test_5_to_9))

Train on 29404 samples, validate on 4861 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<tensorflow.python.keras.callbacks.History at 0x19516ec10>