# Keras Challenge
- The Fashion MNIST Dataset is taken in and processed for modeling through TensorFlow and the Keras API.

In [18]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

# import the necessary packages
from keras.models import Sequential
from keras.layers.normalization import BatchNormalization
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras.layers.core import Activation
from keras.layers.core import Flatten
from keras.layers.core import Dropout
from keras.layers.core import Dense
from keras import backend as K
from keras.models import load_model

# set the matplotlib backend so figures can be saved in the background
import matplotlib
matplotlib.use("Agg")
 
# import the necessary packages
#from pyimagesearch.minivggnet import MiniVGGNet
from sklearn.metrics import classification_report
from keras.optimizers import SGD
from keras.datasets import fashion_mnist
from keras.utils import np_utils
from keras import backend as K
from imutils import build_montages
import matplotlib.pyplot as plt
import numpy as np
import cv2

In [31]:
class MiniVGGNet:

    def build_a(width, height, depth, classes):
        # initialize the model along with the input shape to be
        # "channels last" and the channels dimension itself
        model = Sequential()
        inputShape = (height, width, depth)
        chanDim = -1
 
        # if we are using "channels first", update the input shape
        # and channels dimension
        if K.image_data_format() == "channels_first":
            inputShape = (depth, height, width)
            chanDim = 1
            
        # first CONV => RELU => CONV => RELU => POOL layer set
        model.add(Conv2D(32, (3, 3), padding="same", input_shape=inputShape))
        model.add(Activation("relu"))
        model.add(BatchNormalization(axis=chanDim))
        model.add(Conv2D(32, (3, 3), padding="same"))
        model.add(Activation("relu"))
        model.add(BatchNormalization(axis=chanDim))
        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Dropout(0.25))

        # second CONV => RELU => CONV => RELU => POOL layer set
        model.add(Conv2D(64, (3, 3), padding="same"))
        model.add(Activation("relu"))
        model.add(BatchNormalization(axis=chanDim))
        model.add(Conv2D(64, (3, 3), padding="same"))
        model.add(Activation("relu"))
        model.add(BatchNormalization(axis=chanDim))
        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Dropout(0.25))

        # first (and only) set of FC => RELU layers
        model.add(Flatten())
        model.add(Dense(512))
        model.add(Activation("relu"))
        model.add(BatchNormalization())
        model.add(Dropout(0.5))

        # softmax classifier
        model.add(Dense(classes))
        model.add(Activation("softmax"))

        # return the constructed network architecture
        return model

    def build_b(width, height, depth, classes):
        # initialize the model along with the input shape to be
        # "channels last" and the channels dimension itself
        model = Sequential()
        inputShape = (height, width, depth)
        chanDim = -1
 
        # if we are using "channels first", update the input shape
        # and channels dimension
        if K.image_data_format() == "channels_first":
            inputShape = (depth, height, width)
            chanDim = 1
        
        # first CONV => RELU => CONV => RELU => POOL layer set
        model.add(Conv2D(32, (3, 3), padding="same", input_shape=inputShape))
        model.add(Activation("relu"))
        model.add(BatchNormalization(axis=chanDim))
        model.add(Conv2D(32, (3, 3), padding="same"))
        model.add(Activation("relu"))
        model.add(BatchNormalization(axis=chanDim))
        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Dropout(0.25))
        
        # first (and only) set of FC => RELU layers
        model.add(Flatten())
        model.add(Dense(512))
        model.add(Activation("relu"))
        model.add(BatchNormalization())
        model.add(Dropout(0.5))
        
        # softmax classifier
        model.add(Dense(classes))
        model.add(Activation("softmax"))

        # return the constructed network architecture
        return model
    
    def build_1(width, height, depth, classes): 
        
        model = Sequential()
        inputShape = (height, width, depth)
        chanDim = -1
        
        # if we are using "channels first", update the input shape
        # and channels dimension
        if K.image_data_format() == "channels_first":
            inputShape = (depth, height, width)
            chanDim = 1
        
        # Must define the input shape in the first layer of the neural network
        model.add(Dense(64, activation='relu', input_shape=inputShape))
        model.add(Dropout(0.5))
        model.add(Flatten())
        model.add(Dense(64, activation='relu'))
        model.add(Dropout(0.5))
        model.add(Dense(10, activation='softmax'))
        
        # return the constructed network architecture
        return model
    
    def build_2(width, height, depth, classes): 
        
        model = Sequential()
        inputShape = (height, width, depth)
        chanDim = -1
        
        # if we are using "channels first", update the input shape
        # and channels dimension
        if K.image_data_format() == "channels_first":
            inputShape = (depth, height, width)
            chanDim = 1
        
        # Must define the input shape in the first layer of the neural network
        model.add(Conv2D(filters=64, kernel_size=2, padding='same', activation='relu', input_shape=inputShape)) 
        model.add(MaxPooling2D(pool_size=2))
        model.add(Dropout(0.3))
        model.add(Flatten())
        model.add(Dense(256, activation='relu'))
        model.add(Dropout(0.5))
        model.add(Dense(10, activation='softmax'))
        
        # return the constructed network architecture
        return model
    
    def build_3(width, height, depth, classes): 
        
        model = Sequential()
        inputShape = (height, width, depth)
        chanDim = -1
        
        # if we are using "channels first", update the input shape
        # and channels dimension
        if K.image_data_format() == "channels_first":
            inputShape = (depth, height, width)
            chanDim = 1
        
        # Must define the input shape in the first layer of the neural network
        model.add(Conv2D(filters=64, kernel_size=2, padding='same', activation='relu', input_shape=inputShape)) 
        model.add(MaxPooling2D(pool_size=2))
        model.add(Dropout(0.3))
        model.add(Conv2D(filters=64, kernel_size=2, padding='same', activation='relu')) 
        model.add(MaxPooling2D(pool_size=2))
        model.add(Dropout(0.3))
        model.add(Flatten())
        model.add(Dense(256, activation='relu'))
        model.add(Dropout(0.5))
        model.add(Dense(10, activation='softmax'))
        
        # return the constructed network architecture
        return model

## Loading in the Fashion Dataset, split testing and training groups

In [4]:
# grab the Fashion MNIST dataset (if this is your first time running
# this the dataset will be automatically downloaded)
print("[INFO] loading Fashion MNIST...")
((trainX, trainY), (testX, testY)) = fashion_mnist.load_data()
 
# if we are using "channels first" ordering, then reshape the design
# matrix such that the matrix is:
# num_samples x depth x rows x columns
if K.image_data_format() == "channels_first":
    trainX = trainX.reshape((trainX.shape[0], 1, 28, 28))
    testX = testX.reshape((testX.shape[0], 1, 28, 28))
 
# otherwise, we are using "channels last" ordering, so the design
# matrix shape should be: num_samples x rows x columns x depth
else:
    trainX = trainX.reshape((trainX.shape[0], 28, 28, 1))
    testX = testX.reshape((testX.shape[0], 28, 28, 1))

[INFO] loading Fashion MNIST...


In [5]:
# scale data to the range of [0, 1]
trainX = trainX.astype("float32") / 255.0
testX = testX.astype("float32") / 255.0
 
# one-hot encode the training and testing labels
trainY = np_utils.to_categorical(trainY, 10)
testY = np_utils.to_categorical(testY, 10)
 
# initialize the label names
labelNames = ["top", "trouser", "pullover", "dress", "coat",
    "sandal", "shirt", "sneaker", "bag", "ankle boot"]

## Model 1: Baseline Dense MLP with categorical_crossentropy loss function

In [24]:
# initialize the number of epochs to train for, base learning rate,
# and batch size
NUM_EPOCHS = 10
INIT_LR = 1e-2
BS = 32

# initialize the optimizer and model
print("[INFO] compiling model...")

opt = SGD(lr=INIT_LR, decay=INIT_LR / NUM_EPOCHS, momentum=0.9, nesterov=True)

model = MiniVGGNet.build_1(width=28, height=28, depth=1, classes=10)

model.compile(loss="categorical_crossentropy", optimizer=opt,
    metrics=["accuracy"])
 
# train the network
print("[INFO] training model...")

H = model.fit(trainX, trainY,
    validation_data=(testX, testY),
    batch_size=BS, epochs=NUM_EPOCHS)

score = model.evaluate(testX, testY, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

model.save('f_mnist_1.h5')

[INFO] compiling model...
[INFO] training model...
Train on 60000 samples, validate on 10000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test loss: 0.40305262362957
Test accuracy: 0.8547
Test loss: 0.40305262362957
Test accuracy: 0.8547


## Model 2: Change loss function to binary_crossentropy

In [25]:
# initialize the number of epochs to train for, base learning rate,
# and batch size
NUM_EPOCHS = 10
INIT_LR = 1e-2
BS = 32

# initialize the optimizer and model
print("[INFO] compiling model...")

opt = SGD(lr=INIT_LR, decay=INIT_LR / NUM_EPOCHS, momentum=0.9, nesterov=True)

model = MiniVGGNet.build_1(width=28, height=28, depth=1, classes=10)

model.compile(loss="binary_crossentropy", optimizer=opt,
    metrics=["accuracy"])
 
# train the network
print("[INFO] training model...")

H = model.fit(trainX, trainY,
    validation_data=(testX, testY),
    batch_size=BS, epochs=NUM_EPOCHS)

score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

model.save('f_mnist_2.h5')

[INFO] compiling model...
[INFO] training model...
Train on 60000 samples, validate on 10000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


NameError: name 'x_test' is not defined

In [26]:
score = model.evaluate(testX, testY, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

model.save('f_mnist_2.h5')

Test loss: 0.07549896616339684
Test accuracy: 0.9699600028038025


## Model 3: Adding a Convolution layer with the categorical_crossentropy loss function

In [27]:
# initialize the number of epochs to train for, base learning rate,
# and batch size
NUM_EPOCHS = 10
INIT_LR = 1e-2
BS = 32

# initialize the optimizer and model
print("[INFO] compiling model...")

opt = SGD(lr=INIT_LR, decay=INIT_LR / NUM_EPOCHS, momentum=0.9, nesterov=True)

model = MiniVGGNet.build_2(width=28, height=28, depth=1, classes=10)

model.compile(loss="categorical_crossentropy", optimizer=opt,
    metrics=["accuracy"])
 
# train the network
print("[INFO] training model...")

H = model.fit(trainX, trainY,
    validation_data=(testX, testY),
    batch_size=BS, epochs=NUM_EPOCHS)

score = model.evaluate(testX, testY, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

model.save('f_mnist_3.h5')

[INFO] compiling model...
[INFO] training model...
Train on 60000 samples, validate on 10000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test loss: 0.34248134772777555
Test accuracy: 0.875


## Model 4: Adding Convolution layer with binary_crossentropy

In [30]:
# initialize the number of epochs to train for, base learning rate,
# and batch size
NUM_EPOCHS = 10
INIT_LR = 1e-2
BS = 32

# initialize the optimizer and model
print("[INFO] compiling model...")

opt = SGD(lr=INIT_LR, decay=INIT_LR / NUM_EPOCHS, momentum=0.9, nesterov=True)

model = MiniVGGNet.build_2(width=28, height=28, depth=1, classes=10)

model.compile(loss="binary_crossentropy", optimizer=opt,
    metrics=["accuracy"])
 
# train the network
print("[INFO] training model...")

H = model.fit(trainX, trainY,
    validation_data=(testX, testY),
    batch_size=BS, epochs=NUM_EPOCHS)

score = model.evaluate(testX, testY, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

model.save('f_mnist_4.h5')

[INFO] compiling model...
[INFO] training model...
Train on 60000 samples, validate on 10000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test loss: 0.07380805897712707
Test accuracy: 0.9700500040054322


## Model 5: Adding Second Convolution layer with binary_crossentropy

In [32]:
# initialize the number of epochs to train for, base learning rate,
# and batch size
NUM_EPOCHS = 10
INIT_LR = 1e-2
BS = 32

# initialize the optimizer and model
print("[INFO] compiling model...")

opt = SGD(lr=INIT_LR, decay=INIT_LR / NUM_EPOCHS, momentum=0.9, nesterov=True)

model = MiniVGGNet.build_3(width=28, height=28, depth=1, classes=10)

model.compile(loss="binary_crossentropy", optimizer=opt,
    metrics=["accuracy"])
 
# train the network
print("[INFO] training model...")

H = model.fit(trainX, trainY,
    validation_data=(testX, testY),
    batch_size=BS, epochs=NUM_EPOCHS)

score = model.evaluate(testX, testY, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

model.save('f_mnist_5.h5')

[INFO] compiling model...
[INFO] training model...
Train on 60000 samples, validate on 10000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test loss: 0.0947646532356739
Test accuracy: 0.9606400032997131


# Summary 

- Our baseline accuracy spread is .8319/.8547, indicating an amount of underfit. Throughout the model development, we see that higher accuracy is obtained with the binary_crossentropy loss function. 

- We reached our highest accuracy levels and highest generalization at .9691/.97005, when a Convolution layer was added as the top layer in the network. However adding a second convolution layer behind the first did not help our model's accuracy. We may look into batchnormalization to remedy this result. 

- Although model 4 was the most accurate, it was also the most computationally expensive at a total of 1294 seonds to process. However further parameters would have to be tested to define the curve of the time complexity. 

## Model Predictions

In [None]:
# make predictions on the test set
preds = model.predict(testX)
 
# show a nicely formatted classification report
print("[INFO] evaluating network...")
print(classification_report(testY.argmax(axis=1), preds.argmax(axis=1),
    target_names=labelNames))
 
# plot the training loss and accuracy
N = NUM_EPOCHS
plt.style.use("ggplot")
plt.figure()
plt.plot(np.arange(0, N), H.history["loss"], label="train_loss")
plt.plot(np.arange(0, N), H.history["val_loss"], label="val_loss")
plt.plot(np.arange(0, N), H.history["acc"], label="train_acc")
plt.plot(np.arange(0, N), H.history["val_acc"], label="val_acc")
plt.title("Training Loss and Accuracy on Dataset")
plt.xlabel("Epoch #")
plt.ylabel("Loss/Accuracy")
plt.legend(loc="lower left")
plt.savefig("plot_1.png")