In [1]:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

import numpy as np
import matplotlib.pyplot as plt
import matplotlib
%matplotlib inline



In [2]:
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [3]:
EPOCHS = 1000
STEPS_SIZE = 10

GRAPH_STEPS = EPOCHS / STEPS_SIZE

# x and y coordinates to plot 
x_coordinates = np.linspace(0, GRAPH_STEPS, num=GRAPH_STEPS)
loss_y_coordinates = np.zeros(GRAPH_STEPS)
accuracy_y_coordinates = np.zeros(GRAPH_STEPS)

def plot(x, y, limits, title, x_label_name, y_label_name):
    plt.close("all")
    figure = plt.figure()
    figure.clf()
    
    plt.plot(x, y)
    
    #plt.axis(limits)
    plt.ylabel(y_label_name)
    plt.xlabel(x_label_name)

    figure.suptitle(title, fontsize=20)
    plt.legend(loc='upper right')
    
    plt.show()
    return

def plot_loss():
    limits = [0,GRAPH_STEPS,0,5]
    plot(x_coordinates, loss_y_coordinates, limits, "Loss Overtime", "Epochs", "Loss")  
    return

def plot_accuracy():
    limits = [0,GRAPH_STEPS,0,1]
    plot(x_coordinates, accuracy_y_coordinates, limits, "Accuracy Overtime", "Epochs", "Accuracy")
    return

In [4]:
def weight_variable(shape,name):
    gaussian_matrix = tf.truncated_normal(shape, stddev=0.1)
    weight_matrix = tf.Variable(gaussian_matrix, name=name)
    return weight_matrix

def bias_variable(shape,name):
    gaussian_vector = tf.truncated_normal(shape, stddev=0.1)
    bias_vector = tf.Variable(gaussian_vector, name=name)
    return bias_vector

def convolution(images_matrix, weight_matrix):
    # [batch, height, width, depth]
    strides = [1,2,2,1]
    convoluted_images_matrix = tf.nn.conv2d(images_matrix, weight_matrix, strides=strides, padding='SAME')
    return convoluted_images_matrix

def max_pool_2x2(images_matrix):
    # [batch, height, width, depth]
    strides = [1,2,2,1]
    ksize = [1,2,2,1] #[1,2,2,1]
    smaller_images_matrix = tf.nn.max_pool(images_matrix, ksize=ksize, strides=strides, padding='SAME')
    return smaller_images_matrix

In [5]:
with tf.name_scope('inputs') as scope:
    # inputs
    features_matrix = tf.placeholder(tf.float32, shape=(None, 784), name='features_matrix')
    targets_matrix = tf.placeholder(tf.float32, shape=[None, 10], name="targets_matrix")

    # reshaping images as grids instead of vectors for convolution
    images_matrix = tf.reshape(features_matrix, [-1,28,28,1], name="images_matrix")
    

with tf.name_scope('convolutional1') as scope:
    # hidden inputs
    weight_matrix_conv1 = weight_variable([5, 5, 1, 32],name="weight_matrix_conv1")
    bias_vector_conv1 = bias_variable([32], name="bias_vector_conv1")
    
    # linear operation
    linear_convoluted_matrix_conv1 = convolution(images_matrix, weight_matrix_conv1) + bias_vector_conv1
    
    # nonlinear operation
    nonlinear_convoluted_matrix_conv1 = tf.nn.relu(linear_convoluted_matrix_conv1)
    
    # making output smaller
    smaller_matrix_conv1 = max_pool_2x2(nonlinear_convoluted_matrix_conv1)
    
    
with tf.name_scope('convolutional2') as scope:
    # hidden inputs
    weight_matrix_conv2 = weight_variable([5, 5, 32, 64], name="weight_matrix_conv2")
    bias_vector_conv2 = bias_variable([64], name="bias_vector_conv2")
    
    # linear operation
    linear_convoluted_matrix_conv2 = convolution(smaller_matrix_conv1, weight_matrix_conv2) + bias_vector_conv2
    
    # nonlinear operation
    nonlinear_convoluted_matrix_conv2 = tf.nn.relu(linear_convoluted_matrix_conv2)
    
    # making output smaller
    smaller_matrix_conv2 = max_pool_2x2(nonlinear_convoluted_matrix_conv2)
    
    # making output flat for fully connected layer
    smaller_matrix_conv2_flat = tf.reshape(smaller_matrix_conv2, [-1, 2*2*64])

    
with tf.name_scope('fully_connected_hidden1') as scope:
    # hidden inputs
    weight_matrix_fc1 = weight_variable([2 * 2 * 64, 1024], name="weight_matrix_fc1")
    bias_vector_fc1 = bias_variable([1024], name="bias_vector_fc1")
    
    # linear operation
    linear_hidden_matrix_fc1 = tf.matmul(smaller_matrix_conv2_flat, weight_matrix_fc1) + bias_vector_fc1
    
    # nonlinear operation
    nonlinear_hidden_matrix_fc1 = tf.nn.relu(linear_hidden_matrix_fc1)

    
with tf.name_scope('fully_connected_output') as scope:
    # hidden inputs
    weight_matrix_fc2 = weight_variable([1024, 10], name="weight_matrix_fc2")
    bias_vector_fc2 = bias_variable([10], name="bias_vector_fc2")
    
    # linear operation
    output_matrix_fc2 = tf.matmul(nonlinear_hidden_matrix_fc1, weight_matrix_fc2) + bias_vector_fc2
    
    # making output probabilities
    probabilities_matrix = tf.nn.softmax(output_matrix_fc2)
    
    
cross_entropy = tf.reduce_mean(-tf.reduce_sum(targets_matrix * tf.log(probabilities_matrix), reduction_indices=[1]))
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)

correct_prediction = tf.equal(tf.argmax(probabilities_matrix,1), tf.argmax(targets_matrix,1))

accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

                   


In [6]:
with tf.Session() as sess:
    sess.run(tf.initialize_all_variables())
    merged_summary_op = tf.merge_all_summaries()
    summary_writer = tf.train.SummaryWriter('./ConvNet_TensorBoard',graph=sess.graph)

In [7]:
"""
for epoch in range(EPOCHS):
    batch = mnist.train.next_batch(50)
    train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})
    if epoch % STEPS_SIZE == 0:
        #features_test, targets_test = mnist.test.images, mnist.test.labels
        #l_scalar = sess.run(cross_entropy, feed_dict={x:batch[0], y_: batch[1]})
        #a_scalar = sess.run(accuracy, feed_dict={x:batch[0], y_: batch[1]})
        l_scalar = sess.run(cross_entropy, feed_dict={x:batch[0], y_: batch[1]})
        a_scalar = accuracy.eval(feed_dict={x:batch[0], y_: batch[1], keep_prob: 1.0})
        
        
        loss_y_coordinates[epoch] = l_scalar
        accuracy_y_coordinates[epoch] = a_scalar
    
    

    print("test accuracy %g"%accuracy.eval(feed_dict={x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0}))
"""

'\nfor epoch in range(EPOCHS):\n    batch = mnist.train.next_batch(50)\n    train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})\n    if epoch % STEPS_SIZE == 0:\n        #features_test, targets_test = mnist.test.images, mnist.test.labels\n        #l_scalar = sess.run(cross_entropy, feed_dict={x:batch[0], y_: batch[1]})\n        #a_scalar = sess.run(accuracy, feed_dict={x:batch[0], y_: batch[1]})\n        l_scalar = sess.run(cross_entropy, feed_dict={x:batch[0], y_: batch[1]})\n        a_scalar = accuracy.eval(feed_dict={x:batch[0], y_: batch[1], keep_prob: 1.0})\n        \n        \n        loss_y_coordinates[epoch] = l_scalar\n        accuracy_y_coordinates[epoch] = a_scalar\n    \n    \n\n    print("test accuracy %g"%accuracy.eval(feed_dict={x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0}))\n'