<small>
Copyright (c) 2017 Andrew Glassner

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
</small>



# Deep Learning From Basics to Practice
## by Andrew Glassner, https://dlbasics.com, http://glassner.com
------
## Chapter 23: Keras
### Notebook 9: Improving the model

In [None]:
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense
from keras.utils import np_utils
from keras.models import load_model
from keras.callbacks import EarlyStopping
from keras.optimizers import Adam
from keras.layers import Dropout
from keras.constraints import maxnorm
import h5py
import numpy as np

from keras import backend as keras_backend
keras_backend.set_image_data_format('channels_last')

In [None]:
# Make a File_Helper for saving and loading files.

save_files = True

import os, sys, inspect
current_dir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
sys.path.insert(0, os.path.dirname(current_dir)) # path to parent dir
from DLBasics_Utilities import File_Helper
file_helper = File_Helper(save_files)

In [None]:
# Load and process the MNIST data

random_seed = 42

# load the MNIST data
(X_train, y_train), (X_test, y_test) = mnist.load_data()
image_height = X_train.shape[1]
image_width = X_train.shape[2]
number_of_pixels = image_height * image_width

# cast the sample data to the current Keras floating-point type
X_train = keras_backend.cast_to_floatx(X_train)
X_test = keras_backend.cast_to_floatx(X_test)

# scale data to range [0, 1]
X_train /= 255.0
X_test /= 255.0

# save labels in their non-one-hot form
original_y_train = np.copy(y_train)
original_y_test = np.copy(y_test)

# replace label data with one-hot encoded versions
number_of_classes = 1 + max(np.append(y_train, y_test))
y_train = np_utils.to_categorical(y_train, number_of_classes)
y_test = np_utils.to_categorical(y_test, number_of_classes)

# reshape to 2D grid, one line per image
X_train = X_train.reshape(X_train.shape[0], number_of_pixels)
X_test = X_test.reshape(X_test.shape[0], number_of_pixels)

In [None]:
# These variables are assigned during our pre-processing step. 
# We'll just assign them directly here for this demonstration.
number_of_pixels = 28*28  # size of an MNIST image
number_of_classes = 10    # MNIST images are digits 0 to 9

def make_one_hidden_layer_model():
    # create an empty model
    model = Sequential()
    # add a fully-connected hidden layer with #nodes = #pixels
    model.add(Dense(number_of_pixels, activation='relu',
                    input_shape=[number_of_pixels]))
    # add an output layer with softmax activation
    model.add(Dense(number_of_classes, activation='softmax'))
    # compile the model to turn it from specification to code
    model.compile(loss='categorical_crossentropy', optimizer='adam', 
                  metrics=['accuracy'])
    return model

In [None]:
def make_two_hidden_layers_model():
    model = Sequential()
    model.add(Dense(number_of_pixels, input_shape=[number_of_pixels], 
                    activation='relu'))
    model.add(Dense(number_of_pixels, activation='relu'))
    model.add(Dense(number_of_classes, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

def make_hidden_layer_n_neurons(num_neurons):
    model = Sequential()
    model.add(Dense(num_neurons, input_shape=[number_of_pixels], 
                    activation='relu'))
    model.add(Dense(number_of_classes, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

# make a general model of any number of dense layers, each optionally follows by dropout
def make_layers_model(neurons_per_layer, dropout_per_layer, learning_rate=0.001):
    model = Sequential()
    for i in range(len(neurons_per_layer)):
        kwargs = { 'kernel_initializer': 'normal', 'activation': 'relu' }
        if i==0:
            kwargs['input_dim'] = number_of_pixels
        if dropout_per_layer[i]:
            kwargs['kernel_constraint'] = maxnorm(3)
        model.add(Dense(neurons_per_layer[i], **kwargs))
        if dropout_per_layer[i]:
            model.add(Dropout(0.2))
    model.add(Dense(number_of_classes, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=learning_rate), metrics=['accuracy'])
    return model

In [None]:
# A little utility to draw accuracy and loss plots
import matplotlib.pyplot as plt

def plot_accuracy_and_loss(history, plot_title, filename):
    xs = range(len(history.history['acc']))
    # manually change legend location to 'best' when the locations
    # here don't look good. 

    plt.figure(figsize=(10,3))
    plt.subplot(1, 2, 1)
    plt.plot(xs, history.history['acc'], label='train')
    plt.plot(xs, history.history['val_acc'], label='validation')
    plt.legend(loc='lower left')
    plt.xlabel('epochs')
    plt.ylabel('accuracy')
    plt.title(plot_title+', Accuracy')

    plt.subplot(1, 2, 2)
    plt.plot(xs, history.history['loss'], label='train')
    plt.plot(xs, history.history['val_loss'], label='validation')
    plt.legend(loc='upper left')
    plt.xlabel('epochs')
    plt.ylabel('loss')
    plt.title(plot_title+', Loss')

    #plt.tight_layout()
    file_helper.save_figure(filename)
    plt.show()

In [None]:
# A utility to let us try out lots of variations of models,
# batch size, and early stopping, and plot and save the results.
#
# We use Python's kwargs mechanism is a way to package up the parameters
# to a function in a dictionary. It's a little fancy but it saves a huge
# amount of code. See the Python docs for how this feature works.
#   


def run_and_report(model, plot_title=None, filename='generic-filename', 
                   epochs=100, batch_size=256, verbosity=2, stop_early=False):
    np.random.seed(random_seed)
    kwargs = { 'validation_data': (X_test, y_test), 'epochs': epochs,
              'batch_size': batch_size, 'verbose': verbosity }
    if stop_early:
        early_stopper = EarlyStopping(monitor='val_loss', patience=10, verbose=1)
        kwargs['callbacks'] = [early_stopper]
    history = model.fit(X_train, y_train, **kwargs)
    plot_accuracy_and_loss(history, plot_title, filename)
    models_folder_name = 'NB09-checkpointed-models'
    file_helper.check_for_directory(models_folder_name)

    filename = models_folder_name+'/'+filename+'-'+str(epochs)+'-epochs.h5'
    model.save(filename)
    return history

In [None]:
#
# WARNING: As batch sizes decrease, running time increases FAST.
# Without a GPU, be careful with batch sizes smaller than 256.
#
import time
history_list = []
time_list = []
for batchsize in [2048, 1024, 512, 256, 128, 64, 32, 16, 8]:
    model = make_one_hidden_layer_model()
    plot_title = 'Two layers, batchsize='+str(batchsize)
    filename = 'two-layers-batch-'+str(batchsize)
    start_time = time.time()
    history = run_and_report(model, plot_title, filename, batch_size=batchsize, verbosity=0)
    end_time = time.time()
    elapsed_time = end_time-start_time
    history_list.append(history)
    print("batchsize ",batchsize," elapsed time=",elapsed_time)
    time_list.append(elapsed_time)

In [None]:
plot_accuracy_and_loss(history_list[0], 'Two layers, batchsize=2048', 'two-layers-batch-2048')
plot_accuracy_and_loss(history_list[1], 'Two layers, batchsize=1024', 'two-layers-batch-1024')
plot_accuracy_and_loss(history_list[2], 'Two layers, batchsize=512', 'two-layers-batch-512')
plot_accuracy_and_loss(history_list[3], 'Two layers, batchsize=256', 'two-layers-batch-256')
plot_accuracy_and_loss(history_list[4], 'Two layers, batchsize=128', 'two-layers-batch-128')
plot_accuracy_and_loss(history_list[5], 'Two layers, batchsize=64', 'two-layers-batch-64')
plot_accuracy_and_loss(history_list[6], 'Two layers, batchsize=32', 'two-layers-batch-32')
plot_accuracy_and_loss(history_list[7], 'Two layers, batchsize=16', 'two-layers-batch-16')
plot_accuracy_and_loss(history_list[8], 'Two layers, batchsize=8', 'two-layers-batch-8')

In [None]:
plt.plot(range(len(time_list)), time_list)
plt.xlabel('batch size')
plt.ylabel('time in seconds')
plt.xticks(range(len(time_list)), [2048, 1024, 512, 256, 128, 64, 32, 16, 8])
save_figure('batch-timing')
plt.show()

In [None]:
model = make_two_hidden_layers_model()
history_2HLM = run_and_report(model, 'Two hidden layers', 'two-hidden-layers', batch_size=256, verbosity=2)

In [None]:
model = make_hidden_layer_n_neurons(64)
history_fc_64 = run_and_report(model, '64-neuron hidden layer', '64-neuron-hidden-layer', 
                               batch_size=256, verbosity=2)

In [None]:
model = make_layers_model([32, 32], [False, False])
history_32_32 = run_and_report(model, '2 layers, 32 neurons each', '32-32-layers', 
                               batch_size=256, verbosity=0)

In [None]:
model = make_layers_model([32, 32], [True, True])
history_32_DO_32_DO = run_and_report(model, '32,DO,32,DO', '32-DO-32-DO', 
                               batch_size=256, verbosity=0)

In [None]:
model = make_layers_model([32, 32], [True, True], learning_rate=0.1)
history_32_DO_32_DO_LR_p1 = run_and_report(model, '32-DO-32-DO, lr=0.1', '32-DO-32-DO-LR-p1', 
                               batch_size=256, verbosity=0)

In [None]:
model = make_layers_model([32, 32], [True, True], learning_rate=0.01)
history_32_DO_32_DO_LR_p01 = run_and_report(model, '32-DO-32-DO, lr=0.01', '32-DO-32-DO-LR-p01', 
                               batch_size=256, verbosity=0)