In [1]:
import numpy as np
from keras.datasets import fashion_mnist as fmn
from keras.models import Sequential
from keras.models import Model
from keras.layers import Input, Convolution1D, MaxPooling1D, Dense, Dropout, Flatten
from keras.utils import to_categorical
from keras.callbacks import EarlyStopping

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


# Problem 1
Build a CNN comparable in structure to the one in the reading you did on CNNs, but train and test it on the fashion-MNIST dataset.  Adjust some of the parameters and compare the results.  You should be able to get performance better than any of the classifiers we have used on Fashion-MNIST so far.

In [2]:
# Load the data.
(train_x,train_y),(test_x,test_y) = fmn.load_data()

# Get important nums about data
num_train, height, width = train_x.shape
num_test = test_x.shape[0]
num_classes = np.unique(train_y).shape[0]

# Standardize data
train_x = train_x.astype('float32')
test_x = test_x.astype('float32')
train_x /= np.max(train_x)
test_x /= np.max(test_x)

train_y = to_categorical(train_y,num_classes)
test_y = to_categorical(test_y,num_classes)

In [3]:
# Set up constants
batch_size = 32
num_epochs = 200
kernel_size = 3
pool_size = 2
conv_depth_1 = 32
conv_depth_2 = 64
drop_prob_1 = 0.25
drop_prob_2 = 0.5
hidden_size = 512

In [4]:
# Build your CNN.
inputs = Input(shape=(height, width))
conv_1 = Convolution1D(conv_depth_1,kernel_size,padding='same',activation='relu')(inputs)
conv_2 = Convolution1D(conv_depth_1,kernel_size,padding='same',activation='relu')(conv_1)
pool_1 = MaxPooling1D(pool_size=pool_size)(conv_2)
drop_1 = Dropout(drop_prob_1)(pool_1)
conv_3 = Convolution1D(conv_depth_2,kernel_size,padding='same',activation='relu')(drop_1)
conv_4 = Convolution1D(conv_depth_2,kernel_size,padding='same',activation='relu')(conv_3)
pool_2 = MaxPooling1D(pool_size=pool_size)(conv_4)
drop_2 = Dropout(drop_prob_1)(pool_2)
flat = Flatten()(drop_2)
hidden = Dense(hidden_size, activation='relu')(flat)
drop_3 = Dropout(drop_prob_2)(hidden)
out = Dense(num_classes,activation='softmax')(drop_3)

model = Model(inputs=inputs, outputs=out)
model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])
early_stopping = EarlyStopping(monitor='acc',patience=2)

In [5]:
# Train and test.
model.fit(train_x,train_y,batch_size=batch_size,epochs=num_epochs,verbose=1,callbacks=[early_stopping])
model.evaluate(test_x,test_y,verbose=1)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200


[0.28503741631507873, 0.896]

In [6]:
# Adjust parameters.
# Set up constants
batch_size = 32
num_epochs = 200
kernel_size = 5
pool_size = 3
conv_depth_1 = 32
conv_depth_2 = 64
drop_prob_1 = 0.25
drop_prob_2 = 0.35
hidden_size = 1024
# Build your CNN.
inputs = Input(shape=(height, width))
conv_1 = Convolution1D(conv_depth_1,kernel_size,padding='same',activation='relu')(inputs)
conv_2 = Convolution1D(conv_depth_1,kernel_size,padding='same',activation='relu')(conv_1)
pool_1 = MaxPooling1D(pool_size=pool_size)(conv_2)
drop_1 = Dropout(drop_prob_1)(pool_1)
conv_3 = Convolution1D(conv_depth_2,kernel_size,padding='same',activation='relu')(drop_1)
conv_4 = Convolution1D(conv_depth_2,kernel_size,padding='same',activation='relu')(conv_3)
pool_2 = MaxPooling1D(pool_size=pool_size)(conv_4)
drop_2 = Dropout(drop_prob_1)(pool_2)
flat = Flatten()(drop_2)
hidden = Dense(hidden_size, activation='relu')(flat)
drop_3 = Dropout(drop_prob_2)(hidden)
out = Dense(num_classes,activation='softmax')(drop_3)

model = Model(inputs=inputs, outputs=out)
model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])
early_stopping = EarlyStopping(monitor='acc',patience=2)
# Train and test.
model.fit(train_x,train_y,batch_size=batch_size,epochs=num_epochs,verbose=1,callbacks=[early_stopping])
model.evaluate(test_x,test_y,verbose=1)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200


[0.3066774897933006, 0.8856]

In [7]:
# Adjust parameters.
# Set up constants
batch_size = 32
num_epochs = 200
kernel_size = 2
pool_size = 1
conv_depth_1 = 16
conv_depth_2 = 64
drop_prob_1 = 0.15
drop_prob_2 = 0.35
hidden_size = 1024
# Build your CNN.
inputs = Input(shape=(height, width))
conv_1 = Convolution1D(conv_depth_1,kernel_size,padding='same',activation='relu')(inputs)
conv_2 = Convolution1D(conv_depth_1,kernel_size,padding='same',activation='relu')(conv_1)
pool_1 = MaxPooling1D(pool_size=pool_size)(conv_2)
drop_1 = Dropout(drop_prob_1)(pool_1)
conv_3 = Convolution1D(conv_depth_2,kernel_size,padding='same',activation='relu')(drop_1)
conv_4 = Convolution1D(conv_depth_2,kernel_size,padding='same',activation='relu')(conv_3)
pool_2 = MaxPooling1D(pool_size=pool_size)(conv_4)
drop_2 = Dropout(drop_prob_1)(pool_2)
flat = Flatten()(drop_2)
hidden = Dense(hidden_size, activation='relu')(flat)
drop_3 = Dropout(drop_prob_2)(hidden)
out = Dense(num_classes,activation='softmax')(drop_3)

model = Model(inputs=inputs, outputs=out)
model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])
early_stopping = EarlyStopping(monitor='acc',patience=2)
# Train and test.
model.fit(train_x,train_y,batch_size=batch_size,epochs=num_epochs,verbose=1,callbacks=[early_stopping])
model.evaluate(test_x,test_y,verbose=1)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200


[0.4067879297107458, 0.8965]

In [8]:
# Adjust parameters.
# Set up constants
batch_size = 32
num_epochs = 200
kernel_size = 8
pool_size = 4
conv_depth_1 = 64
conv_depth_2 = 128
drop_prob_1 = 0.25
drop_prob_2 = 0.55
hidden_size = 512
# Build your CNN.
inputs = Input(shape=(height, width))
conv_1 = Convolution1D(conv_depth_1,kernel_size,padding='same',activation='relu')(inputs)
conv_2 = Convolution1D(conv_depth_1,kernel_size,padding='same',activation='relu')(conv_1)
pool_1 = MaxPooling1D(pool_size=pool_size)(conv_2)
drop_1 = Dropout(drop_prob_1)(pool_1)
conv_3 = Convolution1D(conv_depth_2,kernel_size,padding='same',activation='relu')(drop_1)
conv_4 = Convolution1D(conv_depth_2,kernel_size,padding='same',activation='relu')(conv_3)
pool_2 = MaxPooling1D(pool_size=pool_size)(conv_4)
drop_2 = Dropout(drop_prob_1)(pool_2)
flat = Flatten()(drop_2)
hidden = Dense(hidden_size, activation='relu')(flat)
drop_3 = Dropout(drop_prob_2)(hidden)
out = Dense(num_classes,activation='softmax')(drop_3)

model = Model(inputs=inputs, outputs=out)
model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])
early_stopping = EarlyStopping(monitor='acc',patience=2)
# Train and test.
model.fit(train_x,train_y,batch_size=batch_size,epochs=num_epochs,verbose=1,callbacks=[early_stopping])
model.evaluate(test_x,test_y,verbose=1)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200


[0.3061091561436653, 0.8971]

Compare Results :

The original results were the best, at 89.6% accuracy taking 22 epochs of about 11 seconds a piece.  When I increased the kernel and pool size as well as the size of the hidden layer, it stopped after only 15 epochs of about 13 seconds a piece and lost one percent accuracy.  Decreasing the kernel and pool size had the unfortunate effect of overfitting a little, since the training data measured up to 96% accuracy, but when tested on the test data, it only managed 89.65%.  While this is greater than the original accuracy, it not only isn't a very big improvement, but it required 59 epochs of about 45 seconds a piece, far longer than the original parameters.  This indicates to me that I shouldn't have used training accuracy as my indicator to stop training, but rather a validation of the test data.  The last set seems to have performed the second best, taking about twice as long as the original set and giving a better accuracy (one of 89.71%).  These took about 23 seconds a piece, and finished in just 24 epochs.