# Exercise 9: Convolutional Neural Networks with Keras

In [None]:
# Load packages we need
import sys
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

import datetime

import numpy as np
import sklearn

import scipy as sp
import pandas as pd

import tensorflow as tf

# we'll use keras for neural networks
import tensorflow.keras as keras
from tensorflow.keras.datasets import fashion_mnist

# import layers we will use
from tensorflow.keras.layers import Input, Flatten, Dense, Conv2D, MaxPooling2D, Concatenate, Dropout

# import callbacks we will use
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard

# Load the TensorBoard notebook extension
%load_ext tensorboard

%matplotlib inline
from matplotlib import pyplot as plt
plt.rcParams.update({'font.size': 18})

# Let's check our software versions
print('### Python version: ' + sys.version)
print('### Numpy version: ' + np.__version__)
print('### Scikit-learn version: ' + sklearn.__version__)
print('### Tensorflow version: ' + tf.__version__)
print('------------')


# load our packages / code
sys.path.insert(1, '../common/')
import utils
import plots

In [None]:
# global parameters to control behavior of the pre-processing, ML, analysis, etc.

seed = 42 # deterministic seed
np.random.seed(seed) 
tf.random.set_seed(seed)

prop_vec = [24, 2, 2]

## Let's use Fashion MNIST

In [None]:
def load_preprocess_fashion_mnist(minmax_normalize=True):
    
    labels = ['top', 'trouser', 'pullover', 'dress', 'coat', 'sandal', 'shirt', 'sneaker', 'bag', 'ankle boot']
    train, testval = fashion_mnist.load_data()
    
    train_x, train_y = train
    testval_x, testval_y = testval
    
    if minmax_normalize:
        train_x = train_x / 255.0
        testval_x = testval_x / 255.0
    
    # split test - val
    nval = testval_x.shape[0] // 2
    
    val_x = testval_x[:nval]
    val_y = testval_y[:nval]
    
    test_x = testval_x[nval:]
    test_y = testval_y[nval:]
    
    return train_x, train_y, test_x, test_y, val_x, val_y, np.array(labels)

In [None]:
train_x, train_y, test_x, test_y, val_x, val_y, labels = load_preprocess_fashion_mnist()

### What does the data look like?

In [None]:
num_images = 25
label_idx = train_y[:num_images].astype(int)
titles = labels[label_idx]
plots.plot_images(train_x[:num_images].reshape(-1, 28, 28), dim_x=28, dim_y=28, fig_size=(9,9), titles=titles)

In [None]:
def create_model_functional(input_shape=(28, 28)):  
    
    # let's use the functional API to create a model
    input_layer = Input(shape=input_shape, name='Input')
    
    flatten_layer = Flatten(name='Flatten')(input_layer)
    fc1 = Dense(300, name='FC1', activation='relu')(flatten_layer)
    fc2 = Dense(100, name='FC2', activation='relu')(fc1)
    output_layer = Dense(10, name='Output', activation='softmax')(fc2)
    
    model = keras.Model(name='FC-model', inputs=[input_layer], outputs=[output_layer])
    
    return model

In [None]:
model = create_model_functional()

In [None]:
# what does the model look like?
model.summary()

## Let's compile and train the model

In [None]:
# compile the model
model.compile(loss='sparse_categorical_crossentropy', optimizer='Adam', metrics=['accuracy'])

In [None]:
# set up an early stopping callback
early_stop_cb = EarlyStopping(monitor='val_accuracy', patience=3)

# set up a model checkpointing callback
fp = "./mymodel-bestweights.tf"
checkpoint_cb = ModelCheckpoint(fp, monitor='val_accuracy', save_best_only=True, mode='max')

max_epochs = 100
batch_size = 64
history = model.fit(train_x, train_y, validation_data=(val_x, val_y), epochs=max_epochs, batch_size=batch_size, 
                     shuffle=True, callbacks=[early_stop_cb, checkpoint_cb])

## Can we train a CNN for Fashion MNIST?

In [None]:
def create_compile_cnn(input_shape=[28, 28, 1], num_outputs=10, verbose=False):
    
    name = 'CNN'    
    model = keras.models.Sequential(name=name)
    
    model.add(Conv2D(64, kernel_size=(7,7), input_shape=input_shape,
                     padding='same', activation='relu', name='conv1'))
    model.add(MaxPooling2D(2, name='maxpool1')) 
    
    model.add(Conv2D(128, kernel_size=(3,3), activation='relu', padding='same', name='conv2'))
    model.add(Conv2D(128, kernel_size=(3,3), activation='relu', padding='same', name='conv3'))
    model.add(MaxPooling2D(2, name='maxpool2'))
    
    model.add(Conv2D(256, kernel_size=(3,3), activation='relu', padding='same', name='conv4'))
    model.add(Conv2D(256, kernel_size=(3,3), activation='relu', padding='same', name='conv5'))
    model.add(MaxPooling2D(2, name='maxpool3'))
    
    model.add(Flatten(name='flatten'))
    
    model.add(Dense(128, activation='relu', name='fc1'))
    model.add(Dropout(0.5, name='dropout1'))
    model.add(Dense(64, activation='relu', name='fc2'))
    model.add(Dropout(0.5, name='dropout2'))
    
    model.add(Dense(num_outputs, activation="softmax", name='output'))
    
    opt = keras.optimizers.Adam(learning_rate=0.002)
    
    if verbose:
        model.summary()
    
    model.compile(loss='sparse_categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
    
    return name, model

In [None]:
name, model = create_compile_cnn(verbose=True)

In [None]:
fashion_mnist_cnn_fp = 'fashion-MNIST-CNN.tf'
load = os.path.exists(fashion_mnist_cnn_fp)

if load:
    fashion_mnist_cnn_model = tf.keras.models.load_model(fashion_mnist_cnn_fp)
else:
    name, model = create_compile_cnn(verbose=True)

    early_stop_cb = EarlyStopping(monitor='loss', patience=5, restore_best_weights=True)

    max_epochs = 15
    batch_size = 64

    if len(train_x.shape) < 4:
        train_x = train_x.reshape(-1, 28, 28, 1)
        val_x = val_x.reshape(-1, 28, 28, 1)
        test_x = test_x.reshape(-1, 28, 28, 1)

    history = model.fit(train_x, train_y, validation_data=(val_x, val_y), epochs=max_epochs, batch_size=batch_size, 
                         shuffle=True, callbacks=[early_stop_cb])

    # save the model
    model.save(fashion_mnist_cnn_fp)
    fashion_mnist_cnn_model = model

### Can we reuse this model for a different task?

#### Let's try to use the fashion MNIST model for MNIST digit classification

In [None]:
# grab the MNIST data
train_x, train_y, test_x, test_y, val_x, val_y, all_x, all_y = utils.load_preprocess_mnist_data(onehot=False, flatten=False, prop_vec=prop_vec, seed=seed)

# min-max normalize
train_x = train_x / 255.0
test_x = test_x / 255.0
val_x = val_x / 255.0
    
# reshape the data because tensorflow excepts 4d tensors
train_x = train_x.reshape(-1, 28, 28, 1)
test_x = test_x.reshape(-1, 28, 28, 1)
val_x = val_x.reshape(-1, 28, 28, 1)

In [None]:
# let's clone the pretrained model and duplicate the weights
# cloning ensure we won't modify the original model's weights accidentally
base_model = keras.models.clone_model(fashion_mnist_cnn_model)
base_model.set_weights(fashion_mnist_cnn_model.get_weights())

# let's keep all layers except the last two layers (-3 because of dropout)
new_model = keras.models.Sequential(base_model.layers[:-3], name='MNIST-CNN-from-pretrained')

# set the pre-trained layers to be not trainable
for layer_idx, pretrained_layer in enumerate(new_model.layers):
    pretrained_layer.trainable = False 
    
# let's add a new dense layer and a new output layer
new_model.add(Dense(64, activation='relu', name='fc2'))
new_model.add(Dropout(0.4, name='dropout2'))
new_model.add(Dense(10, activation="softmax", name='output'))

new_model.summary()

# compile
opt = keras.optimizers.Adam(0.01)
new_model.compile(loss='sparse_categorical_crossentropy', optimizer=opt, metrics=['accuracy'])

In [None]:
# train for a few epochs to tune the trainable params
history = new_model.fit(train_x, train_y, validation_data=(val_x, val_y), epochs=6)

In [None]:
# Now let's fine-tune the model and allow more training on the pre-trained layers
for layer_idx, pretrained_layer in enumerate(new_model.layers):
    pretrained_layer.trainable = True 

# note that we reduce the learning rate for this: we don't want to completely erase the previous weights
opt = keras.optimizers.Adam(0.001)

new_model.summary()

# compile
new_model.compile(loss='sparse_categorical_crossentropy', optimizer=opt, metrics=['accuracy'])

In [None]:
# train for a few epochs to tune the trainable params
history = new_model.fit(train_x, train_y, validation_data=(val_x, val_y), epochs=2)

In [None]:
# how good is our model?
loss, acc = new_model.evaluate(test_x, test_y)

In [None]:
# we can look at some predictions...

In [None]:
num_images = 25
label_idx = test_y[:num_images].astype(int)
labels = np.arange(0, 10)
titles = labels[label_idx]
plots.plot_images(test_x[:num_images].reshape(-1, 28, 28), dim_x=28, dim_y=28, fig_size=(9,9), titles=titles)

In [None]:
new_model.predict(test_x[0:2]), test_y[0:2]

In [None]:
label_idx = np.argmax(new_model.predict(test_x[:num_images]), axis=-1)
titles = labels[label_idx]
plots.plot_images(test_x[:num_images].reshape(-1, 28, 28), dim_x=28, dim_y=28, fig_size=(9,9), titles=titles)