In [None]:
!pip install -q -U tensorflow>=1.8.0
import tensorflow as tf

In [None]:
# Load the fashion-mnist pre-shuffled train data and test data
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.fashion_mnist.load_data()
print("x_train shape:", x_train.shape, "y_train shape:", y_train.shape)

# normalize
x_train = x_train.astype('float32') / 255
x_test = x_test.astype('float32') / 255

# Further break training data into train / validation sets (# put 5000 into validation set and keep remaining 55,000 for train)
(x_train, x_valid) = x_train[5000:], x_train[:5000] 
(y_train, y_valid) = y_train[5000:], y_train[:5000]

# Reshape input data from (28, 28) to (28, 28, 1)
w, h = 28, 28
x_train = x_train.reshape(x_train.shape[0], w, h, 1)
x_valid = x_valid.reshape(x_valid.shape[0], w, h, 1)
x_test = x_test.reshape(x_test.shape[0], w, h, 1)

# One-hot encode the labels
y_train = tf.keras.utils.to_categorical(y_train, 10)
y_valid = tf.keras.utils.to_categorical(y_valid, 10)
y_test = tf.keras.utils.to_categorical(y_test, 10)

# Print training set shape
print("x_train shape:", x_train.shape, "y_train shape:", y_train.shape)

# Print the number of training, validation, and test datasets
print(x_train.shape[0], 'train set')
print(x_valid.shape[0], 'validation set')
print(x_test.shape[0], 'test set')

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz
x_train shape: (60000, 28, 28) y_train shape: (60000,)
x_train shape: (55000, 28, 28, 1) y_train shape: (55000, 10)
55000 train set
5000 validation set
10000 test set


In [None]:
#clean this up
#add in dropout
#add in representation size and layers
"""
Returns a TensorFlow layer based on layer information stored in tuple.
(convolution, nfilters, filter-size, stride)
(pooling, pool_size, stride)
(dense, nnodes)
(softmax, nnodes)
"""
def create_layer(layer, input_shape):
  layer_type = layer[0]
  if input_shape != None:
    if layer_type == 'convolution':
      return tf.keras.layers.Conv2D(filters=layer[1], kernel_size=layer[2], strides=layer[3], padding='same', input_shape = input_shape)
    if layer_type == 'pooling':
      return tf.keras.layers.MaxPooling2D(pool_size=layer[1], strides=layer[2], input_shape = input_shape)
    if layer_type == 'dense':
      return tf.keras.layers.Dense(layer[1], input_shape = input_shape)
    if layer_type == 'softmax':
      return tf.keras.layers.Dense(layer[1], activation='softmax', input_shape = input_shape)
  else:
    if layer_type == 'convolution':
      return tf.keras.layers.Conv2D(filters=layer[1], kernel_size=layer[2], strides=layer[3], padding='same')
    if layer_type == 'pooling':
      return tf.keras.layers.MaxPooling2D(pool_size=layer[1], strides=layer[2])
    if layer_type == 'dense':
      return tf.keras.layers.Dense(layer[1], activation='relu')
    if layer_type == 'softmax':
      return tf.keras.layers.Dense(layer[1], activation='softmax')
#need to add global average pooling?

"""
Returns a TensorFlow architecture based on architecture information stored as a list of tuples and input shape.
"""
def create_architecture(layers, input_shape):
  model = tf.keras.Sequential()
  for i in range(len(layers)):
    if i == 0: layer = create_layer(layers[i], input_shape)
    else: 
      if (layers[i][0] == 'dense' or layers[i][0] == 'softmax') and (layers[i-1][0] == 'convolution' or layers[i-1][0] == 'pooling'):
        model.add(tf.keras.layers.Flatten()) # flatten if this layer is dense and previous layer was 2D
      layer = create_layer(layers[i], None)
    model.add(layer)
  return model

layer1 = ('convolution', 64, 2, 1)
layer2 = ('pooling', 2, 2)
layer3 = ('convolution', 64, 2, 1)
layer4 = ('pooling', 2, 2)
layer5 = ('dense', 256)
layer6 = ('softmax', 10)

layers = [layer1, layer2, layer3, layer4, layer5, layer6]
model = create_architecture(layers, (28, 28, 1))
model.summary()

Model: "sequential_15"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_19 (Conv2D)           (None, 28, 28, 64)        320       
_________________________________________________________________
max_pooling2d_9 (MaxPooling2 (None, 14, 14, 64)        0         
_________________________________________________________________
conv2d_20 (Conv2D)           (None, 14, 14, 64)        16448     
_________________________________________________________________
max_pooling2d_10 (MaxPooling (None, 7, 7, 64)          0         
_________________________________________________________________
flatten_5 (Flatten)          (None, 3136)              0         
_________________________________________________________________
dense_13 (Dense)             (None, 256)               803072    
_________________________________________________________________
dense_14 (Dense)             (None, 10)              

In [None]:
model.compile(loss='categorical_crossentropy',
             optimizer='adam',
             metrics=['accuracy'])

In [None]:
from keras.callbacks import ModelCheckpoint

checkpointer = ModelCheckpoint(filepath='model.weights.best.hdf5', verbose = 1, save_best_only=True)
model.fit(x_train,
         y_train,
         batch_size=64,
         epochs=1,
         validation_data=(x_valid, y_valid),
         callbacks=[checkpointer])


Epoch 00001: val_loss improved from inf to 0.30687, saving model to model.weights.best.hdf5


<tensorflow.python.keras.callbacks.History at 0x7f19264bbb90>

In [None]:
# Load the weights with the best validation accuracy
model.load_weights('model.weights.best.hdf5')

# Evaluate the model on test set
score = model.evaluate(x_test, y_test, verbose=0)

# Print test accuracy
print('\n', 'Test accuracy:', score[1])


 Test accuracy: 0.8794000148773193
