# Import modules

In [1]:
import os

#import tensorflow as tf
import tensorflow.compat.v1 as tf
tf.compat.v1.disable_eager_execution()
print(tf.__version__)


import numpy as np
import scipy.misc

import matplotlib
import matplotlib.pyplot as plt 

import time

2.7.0


# Load data and set global variables

In [2]:
#load data. labels are in one-hot-encoding format
#generate original training and test data
img_size = 28
n_classes = 10

#global_step = 
input_size = 784
output_size = 10

print('\nLoading MNIST')

mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()

x_train = np.reshape(x_train, [-1, img_size*img_size])
x_train = x_train.astype(np.float32)/255

print(x_train.shape)

x_test = np.reshape(x_test, [-1, img_size*img_size])
x_test = x_test.astype(np.float32)/255

print(x_test.shape)

to_categorical = tf.keras.utils.to_categorical 
y_train = to_categorical(y_train)
y_test  = to_categorical(y_test)

print('\nSpliting data')

ind = np.random.permutation(x_train.shape[0])
x_train, y_train = x_train[ind], y_train[ind]

# 10% for validation 
validatationPct = 0.1
n = int(x_train.shape[0] * (1-validatationPct))
x_valid = x_train[n:]
x_train = x_train[:n]
#
y_valid = y_train[n:]
y_train = y_train[:n]






Loading MNIST
(60000, 784)
(10000, 784)

Spliting data


In [3]:
# (Global) Parameters
learning_rate = 1.0
training_epochs = 100
batch_size = 200
display_step = 1

n_sample = x_train.shape[0]
total_batch = int(x_train.shape[0]/batch_size)

n_sample, total_batch

(54000, 270)

# Define inference function

In [4]:
def inference(x):
    #takes a batch of pictures as input and returns a batch of corresponding probabilities of being in each class
    #input shape = (batch_size*image_size)     output shape = (batch_size*number_of_classes)
    
    init = tf.constant_initializer(value=0)

    W = tf.get_variable("W", [784, 10], initializer=init)
    b = tf.get_variable("b", [10], initializer=init)

    #This function performs the equivalent of softmax = tf.exp(logits) / tf.reduce_sum(tf.exp(logits), axis)
    #which returns a tensor with the same size as logits, the shape is batch_size*10 in this case 
    output = tf.nn.softmax(tf.matmul(x, W) + b)

    return output

# Define loss function

In [5]:
def loss(output, y):
    # output and y have the same shape: batch_size * num_of_classes while the returned loss is a scaler tensor
    # compute the average error per data sample by computing the cross-entropy loss over a minibatch
    
    #mean square error
    #loss = tf.reduce_mean(tf.reduce_sum(tf.square(y-output)))
    
    
    #cross-entropy loss is more commonly used since the confidence of classification is taken into account
    dot_product = y * tf.log(output)
    
    #tf.reduce_sum: Computes the sum of elements across dimensions of a tensor.
    xentropy = -tf.reduce_sum(dot_product, reduction_indices=1)
    
    #tf.reduce_mean: Computes the mean of elements across dimensions of a tensor.
    loss = tf.reduce_mean(xentropy)
    
    return loss

# Define the optimizer and training target

In [6]:
def training(cost, global_step):

    #tf.summary.scalar("cost", cost)
    
    # learning_rate 
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    # Global_step refers to the number of batches seen of far. 
    # When it is passed in the minimize() argument list, the variable is increased by one.
    # You can get the global_step value using tf.train.global_step()
    train_op = optimizer.minimize(cost, global_step=global_step)

    return train_op

# Define evaluation method

In [7]:
def evaluate(output, y):
    #correct_prediction is a vector of boolean elements
    #where true denotes prediction equals to the real value and false means the opposite
    correct_prediction = tf.equal(tf.argmax(output, 1), tf.argmax(y, 1))
    #tf.cast transfer boolean tensor into float tensor
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    
    tf.summary.scalar("validation_error", (1.0 - accuracy))

    return accuracy

# Main function

In [8]:
if __name__ == '__main__':
    
    start_time = time.time()
    #change it with your own path
    if not os.path.isdir('./logs/'):
        os.makedirs('./logs/')
    log_files_path = './logs/'

    with tf.Graph().as_default():
        
        # first build the structure of our neural network
        
        # variables has to be set up as placeholder before importing data
        x = tf.placeholder("float", [None, 784]) # mnist data image of shape 28*28=784
        
        # y is the label in one-hot-encoding format
        y = tf.placeholder("float", [None, 10])  # 0-9 digits recognition
        
        #output is a matrix of probabilities
        output = inference(x)

        cost = loss(output, y)
        
        # set the initial value of global_step as 0, this will increase by 1 every time weights are updated
        global_step = tf.Variable(1, name='global_step', trainable=False)
        train_op = training(cost, global_step)
        
        #train_op = training(cost, global_step=None)

        eval_op = evaluate(output, y)

        summary_op = tf.summary.merge_all()

        #https://www.tensorflow.org/api_docs/python/tf/train/Saver
        saver = tf.train.Saver()
        #define a session
        sess = tf.Session()

        summary_writer = tf.summary.FileWriter(log_files_path, sess.graph)

        #all variables need to be initialized by sess.run(tf.global_variables_initializer())
        init_op = tf.global_variables_initializer()

        sess.run(init_op)
        

        # Training cycle
        for epoch in range(training_epochs):

            avg_cost = 0.0
            
            # Loop over all batches
            for i in range(total_batch):
                
                #print(' batch {0}/{1}'.format(batch + 1, n_batch))
                start = i * batch_size
                end = min(n_sample, start + batch_size)
                minibatch_x, minibatch_y = x_train[start:end], y_train[start:end]
                
                # Fit training using batch data
                # Weights are only updated when we run the optimizer
                sess.run(train_op, feed_dict={x: minibatch_x, y: minibatch_y})
                
                # Compute average loss
                avg_cost += sess.run(cost, feed_dict={x: minibatch_x, y: minibatch_y})/total_batch
                
            # Display logs per epoch step
            if epoch % display_step == 0:
                # Get the accuracy by running the eval_op with validation sets of data
                accuracy = sess.run(eval_op, feed_dict={x: x_valid, y: y_valid})

                print("Epoch:", '%03d' % (epoch+1), "cost function=", "{:.7f}".format(avg_cost), " Validation Error:", (1.0 - accuracy))

                summary_str = sess.run(summary_op, feed_dict={x: minibatch_x, y: minibatch_y})
                summary_writer.add_summary(summary_str, sess.run(global_step))
                
                #https://www.tensorflow.org/api_docs/python/tf/train/Saver
                saver.save(sess, log_files_path+'model-checkpoint', global_step=global_step)


        print("Optimization Finished!")
        # Check the final accuracy after training
        accuracy = sess.run(eval_op, feed_dict={x: x_test, y: y_test})
        print("Test Accuracy:", accuracy)
        
        elapsed_time = time.time() - start_time
        
        print('Execution time was %.3f' % elapsed_time)

Epoch: 001 cost function= 0.3828606  Validation Error: 0.093666672706604
Epoch: 002 cost function= 0.2603395  Validation Error: 0.08950001001358032
Epoch: 003 cost function= 0.2469993  Validation Error: 0.08749997615814209
Epoch: 004 cost function= 0.2398309  Validation Error: 0.08583331108093262
Epoch: 005 cost function= 0.2350717  Validation Error: 0.08499997854232788
Epoch: 006 cost function= 0.2315630  Validation Error: 0.08516669273376465
Instructions for updating:
Use standard file APIs to delete files with this prefix.
Epoch: 007 cost function= 0.2288075  Validation Error: 0.08499997854232788
Epoch: 008 cost function= 0.2265505  Validation Error: 0.08366668224334717
Epoch: 009 cost function= 0.2246457  Validation Error: 0.08350002765655518
Epoch: 010 cost function= 0.2230024  Validation Error: 0.08350002765655518
Epoch: 011 cost function= 0.2215602  Validation Error: 0.08383333683013916
Epoch: 012 cost function= 0.2202776  Validation Error: 0.08416664600372314
Epoch: 013 cost fu