# Import modules

In [2]:
import tensorflow as tf
import time

from tensorflow.examples.tutorials.mnist import input_data

  from ._conv import register_converters as _register_converters


# Load data and set global variables

In [9]:
#load data. labels are in one-hot-encoding format
mnist = input_data.read_data_sets("data/", one_hot=True)


# (Global) Parameters
learning_rate = 0.0005
training_epochs = 100
batch_size = 100
display_step = 1

Extracting data/train-images-idx3-ubyte.gz
Extracting data/train-labels-idx1-ubyte.gz
Extracting data/t10k-images-idx3-ubyte.gz
Extracting data/t10k-labels-idx1-ubyte.gz


# Define inference function

In [10]:
def inference(x):
    #takes a batch of pictures as input and returns a batch of corresponding probabilities of being in each class
    #input shape = (batch_size*image_size)     output shape = (batch_size*number_of_classes)
    
    init = tf.constant_initializer(value=0)

    W = tf.get_variable("Weight", [784, 10], initializer=init)
    b = tf.get_variable("bias", [10], initializer=init)

    #This function performs the equivalent of softmax = tf.exp(logits) / tf.reduce_sum(tf.exp(logits), axis)
    #which returns a tensor with the same size as logits, the shape is batch_size*10 in this case 
    output = tf.nn.softmax(tf.matmul(x, W) + b)

    return output

# Define loss function

In [11]:
def loss(output, y):
    # output and y have the same shape: batch_size * num_of_classes while the returned loss is a scaler tensor
    # compute the average error per data sample by computing the cross-entropy loss over a minibatch
    
    #mean square error
    #loss = tf.reduce_mean(tf.reduce_sum(tf.square(y-output)))
    
    
    #cross-entropy loss is more commonly used 
    #since the confidence of classification is taken into account
    dot_product = y * tf.log(output)
    
    #tf.reduce_sum: Computes the sum of elements across dimensions of a tensor.
    xentropy = -tf.reduce_sum(dot_product, 1)
    
    #tf.reduce_mean: Computes the mean of elements across dimensions of a tensor.
    loss = tf.reduce_mean(xentropy)
    
    return loss

# Define the optimizer and training target

In [12]:
def training(cost, global_step):

    tf.summary.scalar("cost", cost)
    
    # learning_rate 
    optimizer = tf.train.AdamOptimizer(learning_rate)

    
    # Global_step refers to the number of batches seen of far. 
    # When it is passed in the minimize() argument list, the variable is increased by one.
    # You can get the global_step value using tf.train.global_step()
    

    train_op = optimizer.minimize(cost, global_step=global_step)

    return train_op

# Define evaluation method

In [13]:
def evaluate(output, y):
    # correct_prediction is a vector of boolean elements
    # where 
    # true denotes prediction equals to the real value 
    # and 
    # false means the opposite
    correct_prediction = tf.equal(tf.argmax(output, 1), tf.argmax(y, 1))
    #tf.cast transfer boolean tensor into float tensor
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    
    tf.summary.scalar("validation_error", (1.0 - accuracy))

    return accuracy

# Main function

In [14]:
if __name__ == '__main__':
    
    start_time = time.time()
    #change it with your own path
    log_files_path = 'C:/Users/Ali/logs/no_layer_2/'
    #log_files_path = 'C:/Users/Ali/Google Drive/CoursesColumbiaUniveristy/DeepLearning/codes/week3/logs/no_layer/'

    # read 
    # https://www.tensorflow.org/api_docs/python/tf/Graph
    with tf.Graph().as_default():
        
            
        # first build the structure of our neural network

        # variables has to be set up as placeholder before importing data
        x = tf.placeholder("float", [None, 784]) # MNIST data image of shape 28*28=784

        # y is the label in one-hot-encoding format
        y = tf.placeholder("float", [None, 10])  # 0-9 digits recognition

        #output is a matrix of probabilities
        output = inference(x)

        cost = loss(output, y)
        # set the initial value of global_step as 0
        # this will increase by 1 every time weights are updated
        global_step = tf.Variable(0, name='global_step', trainable=False)

        train_op = training(cost, global_step)
        #train_op = training(cost, global_step=None)

        eval_op = evaluate(output, y)

        summary_op = tf.summary.merge_all()

        #https://www.tensorflow.org/api_docs/python/tf/train/Saver
        saver = tf.train.Saver()
        #define a session
        sess = tf.Session()

        # needed for saving the graph
        summary_writer = tf.summary.FileWriter(log_files_path, sess.graph)

        #all variables need to be initialized by sess.run(tf.global_variables_initializer())
        init_op = tf.global_variables_initializer()

        sess.run(init_op)

        print('mnist.train.num_examples ', mnist.train.num_examples)
        print('mnist.test.num_examples ', mnist.test.num_examples)

        # Training cycle
        for epoch in range(training_epochs):

            avg_cost = 0.0
            total_batch = int(mnist.train.num_examples/batch_size)
            #print('total_batch ', total_batch)

            # Loop over all batches
            for i in range(total_batch):

                minibatch_x, minibatch_y = mnist.train.next_batch(batch_size)
                
                # Fit training using batch data
                # Weights are only updated when we run the optimizer
                sess.run(train_op, feed_dict={x: minibatch_x, y: minibatch_y})
                
                # Compute average loss
                avg_cost += sess.run(cost, feed_dict={x: minibatch_x, y: minibatch_y})/total_batch

            # Display logs per epoch step
            if epoch % display_step == 0:
                
                # Get the accuracy by running the eval_op with validation sets of data
                accuracy = sess.run(eval_op, feed_dict={x: mnist.validation.images, y: mnist.validation.labels})

                print("Epoch:", '%03d' % (epoch+1), "cost function=", "{:0.7f}".format(avg_cost), " Validation Error:", (1.0 - accuracy))

                summary_str = sess.run(summary_op, feed_dict={x: minibatch_x, y: minibatch_y})
                summary_writer.add_summary(summary_str, sess.run(global_step))

                #https://www.tensorflow.org/api_docs/python/tf/train/Saver
                saver.save(sess, log_files_path+'model-checkpoint', global_step=global_step)


        print("Optimization Finished!")
        # Check the final accuracy after training
        accuracy = sess.run(eval_op, feed_dict={x: mnist.test.images, y: mnist.test.labels})
        print("Test Accuracy:", accuracy)

        elapsed_time = time.time() - start_time

        print('Execution time was %0.3f' % elapsed_time)

mnist.train.num_examples  55000
mnist.test.num_examples  10000
Epoch: 001 cost function= 0.8448279  Validation Error: 0.11040002107620239
Epoch: 002 cost function= 0.4283471  Validation Error: 0.09399998188018799
Epoch: 003 cost function= 0.3621549  Validation Error: 0.08740001916885376
Epoch: 004 cost function= 0.3316875  Validation Error: 0.08420002460479736
Epoch: 005 cost function= 0.3139974  Validation Error: 0.08060002326965332
Epoch: 006 cost function= 0.3018381  Validation Error: 0.07940000295639038
Epoch: 007 cost function= 0.2933800  Validation Error: 0.0777999758720398
Epoch: 008 cost function= 0.2870633  Validation Error: 0.07520002126693726
Epoch: 009 cost function= 0.2816436  Validation Error: 0.07620000839233398
Epoch: 010 cost function= 0.2772597  Validation Error: 0.07400000095367432
Epoch: 011 cost function= 0.2739142  Validation Error: 0.0745999813079834
Epoch: 012 cost function= 0.2707763  Validation Error: 0.07340002059936523
Epoch: 013 cost function= 0.2680086  Va