# Exercise 8: Training Neural Networks with Keras

In [None]:
# Load packages we need
import sys
import os

import datetime

import numpy as np
import sklearn

import scipy as sp
import pandas as pd

import tensorflow as tf

# we'll use keras for neural networks
import tensorflow.keras as keras
from tensorflow.keras.datasets import fashion_mnist

# import layers we will use
from tensorflow.keras.layers import Input, Flatten, Dense, Conv2D, MaxPooling2D, Concatenate, Dropout

# import callbacks we will use
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard

# Load the TensorBoard notebook extension
%load_ext tensorboard

%matplotlib inline
from matplotlib import pyplot as plt
plt.rcParams.update({'font.size': 18})

# Let's check our software versions
print('### Python version: ' + sys.version)
print('### Numpy version: ' + np.__version__)
print('### Scikit-learn version: ' + sklearn.__version__)
print('### Tensorflow version: ' + tf.__version__)
print('------------')


# load our packages / code
sys.path.insert(1, '../common/')
import utils
import plots

In [None]:
# global parameters to control behavior of the pre-processing, ML, analysis, etc.

seed = 42 # deterministic seed
np.random.seed(seed) 
tf.random.set_seed(seed)

prop_vec = [24, 2, 2]

## Let's use Fashion MNIST

In [None]:
def load_preprocess_fashion_mnist(minmax_normalize=True):
    
    labels = ['top', 'trouser', 'pullover', 'dress', 'coat', 'sandal', 'shirt', 'sneaker', 'bag', 'ankle boot']
    train, testval = fashion_mnist.load_data()
    
    train_x, train_y = train
    testval_x, testval_y = testval
    
    if minmax_normalize:
        train_x = train_x / 255.0
        testval_x = testval_x / 255.0
    
    # split test - val
    nval = testval_x.shape[0] // 2
    
    val_x = testval_x[:nval]
    val_y = testval_y[:nval]
    
    test_x = testval_x[nval:]
    test_y = testval_y[nval:]
    
    return train_x, train_y, test_x, test_y, val_x, val_y, np.array(labels)

In [None]:
train_x, train_y, test_x, test_y, val_x, val_y, labels = load_preprocess_fashion_mnist()

### What does the data look like?

In [None]:
num_images = 25
label_idx = train_y[:num_images].astype(int)
titles = labels[label_idx]
plots.plot_images(train_x[:num_images].reshape(-1, 28, 28), dim_x=28, dim_y=28, fig_size=(9,9), titles=titles)

In [None]:
def create_model_functional(input_shape=(28, 28)):  
    
    # let's use the functional API to create a model
    input_layer = Input(shape=input_shape, name='Input')
    # todo: fill in the blanks
    
    flatten_layer = Flatten(name='Flatten')(input_layer)
    fc1 = Dense(300, name='FC1', activation='relu')(flatten_layer)
    fc2 = Dense(100, name='FC2', activation='relu')(fc1)
    output_layer = Dense(10, name='Output', activation='softmax')(fc2)
    
    model = keras.Model(name='FC-model', inputs=[input_layer], outputs=[output_layer])
    
    return model

In [None]:
model = create_model_functional()

In [None]:
# what does the model look like?
model.summary()

### We can examine the layers of the model

In [None]:
model.layers

In [None]:
# we can also query specific layers by name and get their weights
fc1 = model.get_layer('FC1')

weights, biases = fc1.get_weights()

In [None]:
print(biases[:10])
print(weights[10,:10])

## Let's compile and train the model

In [None]:
# compile the model
model.compile(loss='sparse_categorical_crossentropy', optimizer='Adam', metrics=['accuracy'])

In [None]:
# set up an early stopping callback
early_stop_cb = EarlyStopping(monitor='val_accuracy', patience=3)

# set up a model checkpointing callback
fp = "./mymodel-bestweights.tf"
checkpoint_cb = ModelCheckpoint(fp, monitor='val_accuracy', save_best_only=True, mode='max')

max_epochs = 100
batch_size = 64
history = model.fit(train_x, train_y, validation_data=(val_x, val_y), epochs=max_epochs, batch_size=batch_size, 
                     shuffle=True, callbacks=[early_stop_cb, checkpoint_cb])

### Load the model

In [None]:
loaded_model = tf.keras.models.load_model(fp)

In [None]:
# is it different from the previous model?
model

In [None]:
loss, acc = model.evaluate(test_x, test_y, verbose=0)
print('[Model] Test accuracy: {:.2f}%'.format(100*acc))

loss, acc = loaded_model.evaluate(test_x, test_y, verbose=0)
print('[Loaded Model] Test accuracy: {:.2f}%'.format(100*acc))

### We can (of course) save the model directly (i.e., without checkpointing)

In [None]:
model.save('mymodel.tf')

### Skip some layers with concatenation

In [None]:
def create_model_skip(input_shape=(28, 28)):  
    
    # let's use the functional API to create a model
    input_layer = Input(shape=input_shape, name='Input')
    flatten_layer = Flatten(name='Flatten')(input_layer)
    fc1 = Dense(324, activation='relu', name='FC1')(flatten_layer)
    fc2 = Dense(128, activation='relu', name='FC2')(fc1)
    
    # let's concatenate the input (flattened) to the output of fc2
    concat_layer = Concatenate(name='Concat')([fc2, flatten_layer]) 
    output_layer = Dense(10, activation='softmax', name='Output')(concat_layer)
    
    model = keras.Model(name='FC-model-with-skip', inputs=[input_layer], outputs=[output_layer])
    
    return model

In [None]:
model = create_model_skip()
model.summary()

In [None]:
# compile the model
model.compile(loss='sparse_categorical_crossentropy', optimizer='Adam', metrics=['accuracy'])

early_stop_cb = EarlyStopping(monitor='val_loss', patience=5)

max_epochs = 100
batch_size = 128
history = model.fit(train_x, train_y, validation_data=(val_x, val_y), epochs=max_epochs, batch_size=batch_size, 
                     shuffle=True, callbacks=[early_stop_cb])

In [None]:
# create and compile model
model = create_model_skip()
model.compile(loss='sparse_categorical_crossentropy', optimizer='Adam', metrics=['accuracy'])


# set up tensorboard log directory and callback
log_dir = './logs/fit/' + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_cb = TensorBoard(log_dir=log_dir, histogram_freq=1)

# let's write some of the images data to logs
new_log_dir = './logs/fit/' + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
fw = tf.summary.create_file_writer(new_log_dir)
with fw.as_default():
    for step in range(1, 11):
        tf.summary.image('train_images_{}'.format(step), train_x[(step-1)].reshape(-1, 28, 28, 1), step=step)
        

max_epochs = 50
batch_size = 128
history = model.fit(train_x, train_y, validation_data=(val_x, val_y), epochs=max_epochs, batch_size=batch_size, 
                     shuffle=True, callbacks=[tensorboard_cb])

In [None]:
# Start tensorboard (notebook experience)
%tensorboard --logdir ./logs/fit

## How can we tune hyperparameters?

### Let's use sklearn!

In [None]:
def instantiate_model_with_hyperparams(skip=True, num_hidden=1, hidden_units=96, 
                                       activation_func='relu', eta=0.001, input_shape=(28, 28), verbose=False):
    
    if verbose:
        print('Hyperparameters: ', num_hidden, hidden_units, activation_func, eta, skip)
    
    # define architecture
    input_layer = Input(shape=input_shape, name='Input')
    flatten_layer = Flatten(name='Flatten')(input_layer)
        
    fc1 = Dense(324, activation=activation_func, name='FC1')(flatten_layer)
    
    fcprev = fc1
    for i in range(1, num_hidden):
        fci = Dense(hidden_units, activation=activation_func, name='FC{}'.format(i+2))(fcprev)
        fcprev = fci
    fclast = fcprev
    
    if skip:
        concat_layer = Concatenate(name='Concat')([fclast, flatten_layer]) 
        output_layer = Dense(10, activation='softmax', name='Output')(concat_layer)
    else:
        output_layer = Dense(10, activation='softmax', name='Output')(fclast)
    
    model = keras.Model(inputs=[input_layer], outputs=[output_layer])
    
    # compile the model
    opt = keras.optimizers.Adam(learning_rate=eta)
    model.compile(loss='sparse_categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
    
    return model

### We can use sklearn wrapper to wrap the model into a sklearn estimator (so we can call fit, predict, etc.)

In [None]:
from scikeras.wrappers import KerasClassifier
wrapped_model = KerasClassifier(instantiate_model_with_hyperparams, skip=True, num_hidden=1, hidden_units=96, 
                                       activation_func='relu', eta=0.001, verbose=0)

In [None]:
# We can train the model using the wrapped model. For example:
hist = wrapped_model.fit(train_x, train_y, epochs=2, validation_data=(val_x, val_y))

In [None]:
# we can call functions like score
acc = wrapped_model.score(test_x, test_y)

### Let's do a hyperparameters search

In [None]:
# Let's do randomized search
from sklearn.model_selection import RandomizedSearchCV

# tuples otherwise there is a bug
hyperparams_dist = {
    'eta': (0.0001, 0.001, 0.01),
    'skip': (True, False),
    'num_hidden': (1, 2, 3, 4),
    'hidden_units': (32, 64, 128, 256),
    'activation_func': ('tanh', 'relu', 'selu')
}

cv=2
n_iter=3
random_search = RandomizedSearchCV(wrapped_model, hyperparams_dist, cv=cv, n_iter=n_iter)

In [None]:
# run the actual search; observe the extra parameters we are passing to fit()
# set up an early stopping callback
early_stop_cb = EarlyStopping(patience=3)

#verb=1
verb=0
_ = random_search.fit(train_x, train_y, epochs=10, validation_data=(val_x, val_y), callbacks=[early_stop_cb], verbose=verb)

In [None]:
# warning this could take a while
random_search.best_params_, random_search.best_score_

In [None]:
best_model = random_search.best_estimator_.model

In [None]:
def create_compile_cnn(input_shape=[28, 28, 1], num_outputs=10, verbose=False):
    
    name = 'CNN'    
    model = keras.models.Sequential(name=name)
    
    model.add(Conv2D(64, kernel_size=(7,7), input_shape=input_shape,
                     padding='same', activation='relu', name='conv1'))
    model.add(MaxPooling2D(2, name='maxpool1')) 
    
    model.add(Conv2D(128, kernel_size=(3,3), activation='relu', padding='same', name='conv2'))
    model.add(Conv2D(128, kernel_size=(3,3), activation='relu', padding='same', name='conv3'))
    model.add(MaxPooling2D(2, name='maxpool2'))
    
    model.add(Conv2D(256, kernel_size=(3,3), activation='relu', padding='same', name='conv4'))
    model.add(Conv2D(256, kernel_size=(3,3), activation='relu', padding='same', name='conv5'))
    model.add(MaxPooling2D(2, name='maxpool3'))
    
    model.add(Flatten(name='flatten'))
    
    model.add(Dense(128, activation='relu', name='fc1'))
    model.add(Dropout(0.5, name='dropout1'))
    model.add(Dense(64, activation='relu', name='fc2'))
    model.add(Dropout(0.5, name='dropout2'))
    
    model.add(Dense(num_outputs, activation="softmax", name='output'))
    
    opt = keras.optimizers.Adam(lr=0.002)
    
    if verbose:
        model.summary()
    
    model.compile(loss='sparse_categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
    
    return name, model

In [None]:
name, model = create_compile_cnn(verbose=True)