In [1]:
import sys
sys.path.insert(0, './Libs')

import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

import numpy as np
import matplotlib.pyplot as plt
import matplotlib
%matplotlib inline

from freeze_graph import freeze_graph

In [2]:
# Loading the dataset

mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
print(mnist.train.images[0].shape)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
(784,)


In [3]:
# Hyper parameters

EPOCHS = 200
STEPS_SIZE = 10
GRAPH_STEPS = EPOCHS / STEPS_SIZE

In [4]:
# Plot functions

x_coordinates = np.linspace(0, GRAPH_STEPS, num=GRAPH_STEPS)
loss_y_coordinates = np.zeros(GRAPH_STEPS)
accuracy_y_coordinates = np.zeros(GRAPH_STEPS)

def plot(x, y, limits, title, x_label_name, y_label_name):
    plt.close("all")
    figure = plt.figure()
    figure.clf()
    
    plt.plot(x, y)
    
    #plt.axis(limits)
    plt.ylabel(y_label_name)
    plt.xlabel(x_label_name)

    figure.suptitle(title, fontsize=20)
    plt.legend(loc='upper right')
    
    plt.show()
    return

def plot_loss():
    limits = [0,GRAPH_STEPS,0,5]
    plot(x_coordinates, loss_y_coordinates, limits, "Loss Overtime", "Epochs", "Loss")  
    return

def plot_accuracy():
    limits = [0,GRAPH_STEPS,0,1]
    plot(x_coordinates, accuracy_y_coordinates, limits, "Accuracy Overtime", "Epochs", "Accuracy")
    return

In [5]:
# Defining some functions that make the defining graph code look cleaner

def weight_variable(shape,name):
    gaussian_matrix = tf.truncated_normal(shape, stddev=0.1)
    weight_matrix = tf.Variable(gaussian_matrix, name=name)
    return weight_matrix

def bias_variable(shape,name):
    gaussian_vector = tf.truncated_normal(shape, stddev=0.1)
    bias_vector = tf.Variable(gaussian_vector, name=name)
    return bias_vector

def convolution(images_matrix, weight_matrix):
    # [batch, height, width, depth]
    strides = [1,1,1,1]
    convoluted_images_matrix = tf.nn.conv2d(images_matrix, weight_matrix, strides=strides, padding='SAME')
    return convoluted_images_matrix

def max_pool_2x2(images_matrix):
    # [batch, height, width, depth]
    strides = [1,2,2,1]
    ksize = [1,2,2,1] #[1,2,2,1]
    smaller_images_matrix = tf.nn.max_pool(images_matrix, ksize=ksize, strides=strides, padding='SAME')
    return smaller_images_matrix

In [6]:
# Defining the graph (In this case it is convolutional neural network)

# inputs
targets_matrix = tf.placeholder(tf.float32, shape=[None, 10], name="targets_matrix")
features_matrix = tf.placeholder(tf.float32, shape=[None, 784], name='features_matrix')

# reshaping images as grids instead of vectors for convolution
images_matrix = tf.reshape(features_matrix, [-1,28,28,1], name="input_node")

# The value of this variable can be changed before it is converted to a const by freeze_graph 
keep_prob = tf.Variable(0.6, name='keep_prob')
is_training = tf.Variable(True, name='is_trining')



with tf.name_scope('convolutional1') as scope:
    # hidden inputs
    weight_matrix_conv1 = weight_variable([5, 5, 1, 32],name="weight_matrix_conv1")
    bias_vector_conv1 = bias_variable([32], name="bias_vector_conv1")

    # linear operation
    linear_convoluted_matrix_conv1 = convolution(images_matrix, weight_matrix_conv1) + bias_vector_conv1

    # nonlinear operation
    nonlinear_convoluted_matrix_conv1 = tf.nn.relu(linear_convoluted_matrix_conv1)

    # making output smaller
    smaller_matrix_conv1 = max_pool_2x2(nonlinear_convoluted_matrix_conv1)



with tf.name_scope('convolutional2') as scope:
    # hidden inputs
    weight_matrix_conv2 = weight_variable([5, 5, 32, 64], name="weight_matrix_conv2")
    bias_vector_conv2 = bias_variable([64], name="bias_vector_conv2")

    # linear operation
    linear_convoluted_matrix_conv2 = convolution(smaller_matrix_conv1, weight_matrix_conv2) + bias_vector_conv2

    # nonlinear operation
    nonlinear_convoluted_matrix_conv2 = tf.nn.relu(linear_convoluted_matrix_conv2)

    # making output smaller
    smaller_matrix_conv2 = max_pool_2x2(nonlinear_convoluted_matrix_conv2)

    # making output flat for fully connected layer
    smaller_matrix_conv2_flat = tf.reshape(smaller_matrix_conv2, [-1, 7*7*64])



with tf.name_scope('fully_connected_hidden1') as scope:
    # hidden inputs
    weight_matrix_fc1 = weight_variable([7 * 7 * 64, 1024], name="weight_matrix_fc1")
    bias_vector_fc1 = bias_variable([1024], name="bias_vector_fc1")

    # linear operation
    linear_hidden_matrix_fc1 = tf.matmul(smaller_matrix_conv2_flat, weight_matrix_fc1) + bias_vector_fc1

    # nonlinear operation
    nonlinear_hidden_matrix_fc1 = tf.nn.relu(linear_hidden_matrix_fc1)
    
    # This did not work
    #no_dropout_hidden_matrix_fc1 = tf.nn.relu(linear_hidden_matrix_fc1)
    #dropout_hidden_matrix_fc1 = tf.nn.dropout(no_dropout_hidden_matrix_fc1, keep_prob)
    #nonlinear_hidden_matrix_fc1 = tf.cond(is_training, lambda: dropout_hidden_matrix_fc1, lambda: no_dropout_hidden_matrix_fc1)



with tf.name_scope('fully_connected_output') as scope:
    # hidden inputs
    weight_matrix_fc2 = weight_variable([1024, 10], name="weight_matrix_fc2")
    bias_vector_fc2 = bias_variable([10], name="bias_vector_fc2")

    # linear operation
    output_matrix_fc2 = tf.matmul(nonlinear_hidden_matrix_fc1, weight_matrix_fc2) + bias_vector_fc2

    # making output probabilities
    probabilities_matrix = tf.nn.softmax(output_matrix_fc2, name = "output_node")


cross_entropy = tf.reduce_mean(-tf.reduce_sum(targets_matrix * tf.log(probabilities_matrix), reduction_indices=[1]))
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)

correct_prediction = tf.equal(tf.argmax(probabilities_matrix,1), tf.argmax(targets_matrix,1))

accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [7]:
accuracy_summary = tf.scalar_summary( 'accuracy', accuracy )
loss_summary = tf.scalar_summary( 'loss function', cross_entropy )

In [8]:
# Training the convolutional neural network
step = 0

with tf.Session() as sess:
    sess.run(tf.initialize_all_variables())
    
    
    summaries = tf.merge_all_summaries()
    summary_writer = tf.train.SummaryWriter('./TensorBoard_ConvNet/Before_Serialization',graph=sess.graph)
    
    

    for epoch in range(EPOCHS):
        batch = mnist.train.next_batch(50)
        sess.run(train_step, feed_dict={features_matrix: batch[0], targets_matrix: batch[1]})

        if epoch % STEPS_SIZE == 0:

            l_scalar = sess.run(cross_entropy, feed_dict={features_matrix:batch[0], targets_matrix: batch[1]})
            a_scalar = sess.run(accuracy, feed_dict={features_matrix:batch[0], targets_matrix: batch[1]})

            summary = sess.run(summaries, feed_dict={features_matrix:batch[0], targets_matrix: batch[1]})
            summary_writer.add_summary(summary, step)

            loss_y_coordinates[step] = l_scalar
            accuracy_y_coordinates[step] = a_scalar
            step = step + 1

            
            
            
    # Changing the value of keep_prob
    sess.run(keep_prob.assign(1.0))
    sess.run(is_training.assign(False))

    summary_writer.close()
    
    
    
    
    
    # input_graph - TensorFlow 'GraphDef' file to load
    # input_saver - TensorFlow saver file to load
    # input_checkpoint - TensorFlow variables file to load

    # output_graph - Output 'GraphDef' file name
    # input_binary - Whether the input files are in binary format
    # output_node_names - The name of the output nodes, comma separated
    # restore_op_name - The name of the master restore operator
    # filename_tensor_name - The name of the tensor holding the save path
    # clear_devices - Whether to remove device specifications
    # initializer_nodes - comma separated list of initializer nodes to run before freezing

    # Saving the trained network

    path = "./TrainedModel_ConvNet/"
    checkpoint_weights_filename = "weights"
    checkpoint_graph_filename = "graph.pb"
    trained_graph_filename = "trained_graph.pb"

    checkpoint_weights_path = path + checkpoint_weights_filename
    checkpoint_graph_path = path + checkpoint_graph_filename
    trained_graph_path = path + trained_graph_filename
    saver_path = ""

    as_text = True
    as_binary = not as_text

    
    
    
    # Saving learned weights of the model
    tf.train.Saver().save(sess, checkpoint_weights_path) #, global_step=0, latest_filename="checkpoint_name")

    
    
    # Saving graph definition
    tf.train.write_graph(sess.graph.as_graph_def(), path, checkpoint_graph_filename, as_text)

    
    
    
    # Merging graph definition and learned weights into a trained graph
    input_saver_path = ""
    input_binary = False
    output_node_names = "output_node"
    restore_op_name = "save/restore_all"
    filename_tensor_name = "save/Const:0"
    clear_devices = False

    freeze_graph (
        input_graph = checkpoint_graph_path,
        input_saver = saver_path,
        input_checkpoint = checkpoint_weights_path,
        output_graph = trained_graph_path,

        initializer_nodes = None, #"input_node",
        output_node_names = "fully_connected_output/output_node",

        restore_op_name = "save/restore_all",
        filename_tensor_name = "save/Const:0",

        input_binary = as_binary,
        clear_devices = True
    )

Converted 8 variables to const ops.
35 ops in the final graph.


In [9]:




#save_trained_model(sess, "./ConvNet_TrainedModel_as_a_GraphDefinition", "conv_net_trained_graph.pb")