Import packages

In [10]:
# set the matplotlib backend so figures can be saved in the background
import matplotlib
matplotlib.use("Agg")

# import the necessary package
from tensorflow.keras.callbacks import EarlyStopping  # shortstop if not improving
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.datasets import fashion_mnist
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import backend as K
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt
import kerastuner as kt
import numpy as np
import argparse
import cv2

Implement Config Class

In [11]:
class Config:
    # define the path to our output directory
    OUTPUT_PATH = "output"

    # initialize the input shape and number of classes
    INPUT_SHAPE = (28, 28, 1)
    NUM_CLASSES = 10

    # define the total number of epochs to train, batch size, and the
    # early stopping patience
    EPOCHS = 50
    BS = 32
    EARLY_STOPPING_PATIENCE = 5

# instantiate an object of the configuration class
config = Config()

Implement Helper Function

In [12]:
def save_plot(H, path):
	# plot the training loss and accuracy
	plt.style.use("ggplot")
	plt.figure()
	plt.plot(H.history["loss"], label="train_loss")
	plt.plot(H.history["val_loss"], label="val_loss")
	plt.plot(H.history["accuracy"], label="train_acc")
	plt.plot(H.history["val_accuracy"], label="val_acc")
	plt.title("Training Loss and Accuracy")
	plt.xlabel("Epoch #")
	plt.ylabel("Loss/Accuracy")
	plt.legend()
	plt.savefig(path)

Create CNN

In [13]:
def build_model(hp):
	# initialize the model along with the input shape and channel
	# dimension
	model = Sequential()
	inputShape = config.INPUT_SHAPE
	chanDim = -1

	# first CONV => RELU => POOL layer set
	model.add(Conv2D(
		hp.Int("conv_1", min_value=32, max_value=96, step=32),
		(3, 3), padding="same", input_shape=inputShape))
	model.add(Activation("relu"))
	model.add(BatchNormalization(axis=chanDim))
	model.add(MaxPooling2D(pool_size=(2, 2)))

	# second CONV => RELU => POOL layer set
	model.add(Conv2D(
		hp.Int("conv_2", min_value=64, max_value=128, step=32),
		(3, 3), padding="same"))
	model.add(Activation("relu"))
	model.add(BatchNormalization(axis=chanDim))
	model.add(MaxPooling2D(pool_size=(2, 2)))

	# first (and only) set of FC => RELU layers
	model.add(Flatten())
	model.add(Dense(hp.Int("dense_units", min_value=256,
		max_value=768, step=256)))
	model.add(Activation("relu"))
	model.add(BatchNormalization())
	model.add(Dropout(0.5))

	# softmax classifier
	model.add(Dense(config.NUM_CLASSES))
	model.add(Activation("softmax"))

	# initialize the learning rate choices and optimizer
	lr = hp.Choice("learning_rate",
		values=[1e-1, 1e-2, 1e-3])
	opt = Adam(learning_rate=lr)

	# compile the model
	model.compile(optimizer=opt, loss="categorical_crossentropy",
		metrics=["accuracy"])

	# return the model
	return model

Implement hyperparam tuning with Keras

In [14]:
# since we are using Jupyter Notebooks we can replace our argument
# parsing code with *hard coded* arguments and values
args = {
	"tuner": "random",
	"plot": "output/random_plot.png"
}

Loading MNIST data

In [15]:
# load the Fashion MNIST dataset
print("[INFO] loading Fashion MNIST...")
((trainX, trainY), (testX, testY)) = fashion_mnist.load_data()

# add a channel dimension to the dataset
trainX = trainX.reshape((trainX.shape[0], 28, 28, 1))
testX = testX.reshape((testX.shape[0], 28, 28, 1))

# scale data to the range of [0, 1]
trainX = trainX.astype("float32") / 255.0
testX = testX.astype("float32") / 255.0

# one-hot encode the training and testing labels
trainY = to_categorical(trainY, 10)
testY = to_categorical(testY, 10)

# initialize the label names
labelNames = ["top", "trouser", "pullover", "dress", "coat",
	"sandal", "shirt", "sneaker", "bag", "ankle boot"]

[INFO] loading Fashion MNIST...
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz


In [16]:
# initialize an early stopping callback to prevent the model from
# overfitting/spending too much time training with minimal gains
es = EarlyStopping(
	monitor="val_loss",
	patience=config.EARLY_STOPPING_PATIENCE,
	restore_best_weights=True)

In [17]:
# check if we will be using the hyperband tuner
if args["tuner"] == "hyperband":
	# instantiate the hyperband tuner object
	print("[INFO] instantiating a hyperband tuner object...")
	tuner = kt.Hyperband(
		build_model,
		objective="val_accuracy",
		max_epochs=config.EPOCHS,
		factor=3,
		seed=42,
		directory=config.OUTPUT_PATH,
		project_name=args["tuner"])

# check if we will be using the random search tuner
elif args["tuner"] == "random":
	# instantiate the random search tuner object
	print("[INFO] instantiating a random search tuner object...")
	tuner = kt.RandomSearch(
		build_model,
		objective="val_accuracy",
		max_trials=10,
		seed=42,
		directory=config.OUTPUT_PATH,
		project_name=args["tuner"])

# otherwise, we will be using the bayesian optimization tuner
else:
	# instantiate the bayesian optimization tuner object
	print("[INFO] instantiating a bayesian optimization tuner object...")
	tuner = kt.BayesianOptimization(
		build_model,
		objective="val_accuracy",
		max_trials=10,
		seed=42,
		directory=config.OUTPUT_PATH,
		project_name=args["tuner"])

[INFO] instantiating a random search tuner object...


In [None]:
# perform the hyperparameter search
print("[INFO] performing hyperparameter search...")
tuner.search(
	x=trainX, y=trainY,
	validation_data=(testX, testY),
	batch_size=config.BS,
	callbacks=[es],
	epochs=config.EPOCHS
)

# grab the best hyperparameters
bestHP = tuner.get_best_hyperparameters(num_trials=1)[0]
print("[INFO] optimal number of filters in conv_1 layer: {}".format(
	bestHP.get("conv_1")))
print("[INFO] optimal number of filters in conv_2 layer: {}".format(
	bestHP.get("conv_2")))
print("[INFO] optimal number of units in dense layer: {}".format(
	bestHP.get("dense_units")))
print("[INFO] optimal learning rate: {:.4f}".format(
	bestHP.get("learning_rate")))

Trial 2 Complete [00h 17m 58s]
val_accuracy: 0.8859999775886536

Best val_accuracy So Far: 0.9150999784469604
Total elapsed time: 03h 20m 13s

Search: Running Trial #3

Hyperparameter    |Value             |Best Value So Far 
conv_1            |64                |64                
conv_2            |64                |64                
dense_units       |768               |512               
learning_rate     |0.001             |0.01              

Epoch 1/50
Epoch 2/50
Epoch 3/50

In [None]:
# build the best model and train it
print("[INFO] training the best model...")
model = tuner.hypermodel.build(bestHP)
H = model.fit(x=trainX, y=trainY,
	validation_data=(testX, testY), batch_size=config.BS,
	epochs=config.EPOCHS, callbacks=[es], verbose=1)

# evaluate the network
print("[INFO] evaluating network...")
predictions = model.predict(x=testX, batch_size=32)
print(classification_report(testY.argmax(axis=1),
	predictions.argmax(axis=1), target_names=labelNames))

# generate the training loss/accuracy plot
save_plot(H, args["plot"])