In [1]:
from sklearn.model_selection import train_test_split

import tensorflow as tf

In [2]:
import matplotlib.pyplot as plt
import numpy as np

In [3]:
fashion_mnist = tf.keras.datasets.fashion_mnist

(X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()

In [4]:
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=10**4, random_state=42)

In [5]:
from tensorflow.keras.utils import to_categorical
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)
y_val = to_categorical(y_val)

In [6]:
def randomize(x, y):
    """ Randomizes the order of data samples and their corresponding labels"""
    permutation = np.random.permutation(y.shape[0])
    shuffled_x = x[permutation, :]
    shuffled_y = y[permutation]
    return shuffled_x, shuffled_y

def get_next_batch(x, y, start, end):
    x_batch = x[start:end]
    y_batch = y[start:end]
    return x_batch, y_batch

In [7]:
class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat',
               'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']

In [24]:
img_h = img_w = 28             # MNIST images are 28x28
img_size_flat = img_h * img_w  # 28x28=784, the total number of pixels
n_classes = 10                 # Number of classes, one class per digit
# Hyper-parameters
epochs = 20             # Total number of training epochs
batch_size = 100        # Training batch size
display_freq = 100      # Frequency of displaying the training results
learning_rate = 0.005   # The optimization initial learning rate

h1 = 400                # number of nodes in the 1st hidden layer

In [9]:
X_train = np.reshape(X_train, [-1, img_size_flat])
X_test = np.reshape(X_test, [-1, img_size_flat])
X_val = np.reshape(X_val, [-1, img_size_flat])

In [10]:
# weight and bias 
def weight_variable(name, shape):
    """
    Create a weight variable with appropriate initialization
    :param name: weight name
    :param shape: weight shape
    :return: initialized weight variable
    """
    initer = tf.compat.v1.truncated_normal_initializer(stddev=0.01)
    return tf.compat.v1.get_variable('W_' + name,
                           dtype=tf.float32,
                           shape=shape,
                           initializer=initer)

def bias_variable(name, shape):
    """
    Create a bias variable with appropriate initialization
    :param name: bias variable name
    :param shape: bias variable shape
    :return: initialized bias variable
    """
    initial = tf.constant(0., shape=shape, dtype=tf.float32)
    return tf.compat.v1.get_variable('b_' + name,
                           dtype=tf.float32,
                           initializer=initial)

In [11]:
def fc_layer(x, num_units, name, use_relu=True):
    """
    Create a fully-connected layer
    :param x: input from previous layer
    :param num_units: number of hidden units in the fully-connected layer
    :param name: layer name
    :param use_relu: boolean to add ReLU non-linearity (or not)
    :return: The output array
    """
    in_dim = x.get_shape()[1]
    W = weight_variable(name, shape=[in_dim, num_units])
    b = bias_variable(name, [num_units])
    layer = tf.matmul(x, W)
    layer += b
    if use_relu:
        layer = tf.nn.relu(layer)
    return layer

In [12]:
# Create the graph for the linear model
# Placeholders for inputs (x) and outputs(y)
tf.compat.v1.disable_eager_execution()
x = tf.compat.v1.placeholder(tf.float32, shape=[None, img_size_flat], name='X')
y = tf.compat.v1.placeholder(tf.float32, shape=[None, n_classes], name='Y')

In [13]:
# Create a fully-connected layer with h1 nodes as hidden layer
fc1 = fc_layer(x, h1, 'FC1', use_relu=True)
# Create a fully-connected layer with n_classes nodes as output layer
output_logits = fc_layer(fc1, n_classes, 'OUT', use_relu=False)

In [14]:
# Define the loss function, optimizer, and accuracy
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=output_logits), name='loss')
optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=learning_rate, name='Adam-op').minimize(loss)
correct_prediction = tf.equal(tf.argmax(output_logits, 1), tf.argmax(y, 1), name='correct_pred')
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name='accuracy')

# Network predictions
cls_prediction = tf.argmax(output_logits, axis=1, name='predictions')

In [15]:
# Create the op for initializing all variables
init = tf.compat.v1.global_variables_initializer()

In [16]:
print('x_train:\t{}'.format(X_train.shape))
print('y_train:\t{}'.format(y_train.shape))
print('x_train:\t{}'.format(X_val.shape))
print('y_valid:\t{}'.format(y_val.shape))

x_train:	(50000, 784)
y_train:	(50000, 10)
x_train:	(10000, 784)
y_valid:	(10000, 10)


In [25]:
# Create an interactive session (to keep the session in the other cells)
sess = tf.compat.v1.InteractiveSession()
# Initialize all variables
sess.run(init)
# Number of training iterations in each epoch
num_tr_iter = int(len(y_train) / batch_size)
for epoch in range(epochs):
    print('Training epoch: {}'.format(epoch + 1))
    # Randomly shuffle the training data at the beginning of each epoch 
    X_train, y_train = randomize(X_train, y_train)
    for iteration in range(num_tr_iter):
        start = iteration * batch_size
        end = (iteration + 1) * batch_size
        x_batch, y_batch = get_next_batch(X_train, y_train, start, end)

        # Run optimization op (backprop)
        feed_dict_batch = {x: x_batch, y: y_batch}
        sess.run(optimizer, feed_dict=feed_dict_batch)

        if iteration % display_freq == 0:
            # Calculate and display the batch loss and accuracy
            loss_batch, acc_batch = sess.run([loss, accuracy],
                                             feed_dict=feed_dict_batch)

            print("iter {0:3d}:\t Loss={1:.2f},\tTraining Accuracy={2:.01%}".
                  format(iteration, loss_batch, acc_batch))

    # Run validation after every epoch
    feed_dict_valid = {x: X_val[:1000], y: y_val[:1000]}
    loss_valid, acc_valid = sess.run([loss, accuracy], feed_dict=feed_dict_valid)
    print('---------------------------------------------------------')
    print("Epoch: {0}, validation loss: {1:.2f}, validation accuracy: {2:.01%}".
          format(epoch + 1, loss_valid, acc_valid))
    print('---------------------------------------------------------')
 



Training epoch: 1
iter   0:	 Loss=6.44,	Training Accuracy=39.0%
iter 100:	 Loss=0.53,	Training Accuracy=82.0%
iter 200:	 Loss=0.40,	Training Accuracy=92.0%
iter 300:	 Loss=0.41,	Training Accuracy=81.0%
iter 400:	 Loss=0.40,	Training Accuracy=87.0%
---------------------------------------------------------
Epoch: 1, validation loss: 0.42, validation accuracy: 85.4%
---------------------------------------------------------
Training epoch: 2
iter   0:	 Loss=0.36,	Training Accuracy=83.0%
iter 100:	 Loss=0.37,	Training Accuracy=85.0%
iter 200:	 Loss=0.40,	Training Accuracy=85.0%
iter 300:	 Loss=0.42,	Training Accuracy=86.0%
iter 400:	 Loss=0.45,	Training Accuracy=87.0%
---------------------------------------------------------
Epoch: 2, validation loss: 0.43, validation accuracy: 84.1%
---------------------------------------------------------
Training epoch: 3
iter   0:	 Loss=0.26,	Training Accuracy=90.0%
iter 100:	 Loss=0.37,	Training Accuracy=90.0%
iter 200:	 Loss=0.36,	Training Accuracy=87

In [26]:
# Test the network after training
# Accuracy
feed_dict_test = {x: X_test[:1000], y: y_test[:1000]}
loss_test, acc_test = sess.run([loss, accuracy], feed_dict=feed_dict_test)
print('---------------------------------------------------------')
print("Test loss: {0:.2f}, test accuracy: {1:.01%}".format(loss_test, acc_test))
print('---------------------------------------------------------')

---------------------------------------------------------
Test loss: 0.45, test accuracy: 86.2%
---------------------------------------------------------


In [27]:
sess.close()