# MNIST CNN Board Example
Adapted from [TensorFlow: Deep MNIST for Experts](https://www.tensorflow.org/tutorials/mnist/pros/index.html).

Demonstrates the usage of the *tensortools/board* module.

In [8]:
import os
import sys

# add path to libraries for ipython
sys.path.append(os.path.expanduser("~/libs"))

import time
from datetime import datetime
import numpy as npvariable_scope
import tensorflow as tf
import tensortools as tt

In [9]:
BATCH_SIZE = 50
MAX_STEPS = 2000
DROPOUT = 0.5
LEARNING_RATE = 1e-4
REG = 5e-4
TRAIN_DIR = 'train'

In [10]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [23]:
def inference(x, dropout_keep_prob):
    x_image = tf.reshape(x, [-1,28,28,1])

    # Conv1
    conv1 = tt.network.conv2d("Conv1", x_image,
                              32, 5, 5, 1, 1,
                              weight_init=0.01, 
                              bias=0.1,
                              regularizer=tf.contrib.layers.l2_regularizer(REG),
                              activation=tf.nn.relu)
    tt.board.activation_summary(conv1, True, scope="Conv1")
    h_pool1 = tt.network.max_pool2d(conv1, 2, 2, 2, 2)

    # Conv2
    conv2 = tt.network.conv2d("Conv2", h_pool1,
                              64, 5, 5, 1, 1,
                              weight_init=0.01, 
                              bias=0.1,
                              regularizer=tf.contrib.layers.l2_regularizer(REG))
    h_conv2 = tt.network.lrelu(conv2, 0.2)
    tt.board.activation_summary(h_conv2, scope="Conv2")
    h_pool2 = tt.network.max_pool2d(h_conv2, 2, 2, 2, 2)
    
    # Conv3
    conv3 = tt.network.conv2d("Conv3", h_pool2,
                              64, 5, 5, 1, 1,
                              weight_init=0.01, 
                              bias=0.1,
                              regularizer=tf.contrib.layers.l2_regularizer(REG))
    h_conv3 = tf.nn.relu(conv3, name="ReLu")
    tt.board.activation_summary(h_conv3, scope="Conv3")

    # FC
    h_pool2_flat = tf.reshape(h_conv3, [-1, 7*7*64])
    fc1 = tt.network.fc("FC", h_pool2_flat, 1024,
                          weight_init=tf.contrib.layers.xavier_initializer(), 
                          bias=0.1,
                          regularizer=tf.contrib.layers.l2_regularizer(REG))
    h_fc1 = tf.nn.relu(fc1, name="ReLu")
    tt.board.activation_summary(h_fc1, scope="FC")

    h_fc1_drop = tf.nn.dropout(h_fc1, dropout_keep_prob)

    # Output
    y_conv=tf.nn.softmax(tt.network.fc("Output", h_fc1_drop, 10,
                                       weight_init=tf.contrib.layers.xavier_initializer(),
                                       regularizer=tf.contrib.layers.l2_regularizer(REG),
                                       bias=0.1))
    return y_conv

In [24]:
def train(output, label, global_step):
    with tf.name_scope("Train"):
        cross_entropy = tf.reduce_mean(-tf.reduce_sum(label * tf.log(output), reduction_indices=[1]), name="cross_entropy")
        
        reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
        print("{} regularization losses in total.".format(len(reg_losses)))
        total_loss = tf.add(cross_entropy, tf.add_n(reg_losses), name="total_loss")
        
        # Generate moving averages of all losses and associated summaries
        cost_averages_op = tt.board.loss_summary([total_loss, cross_entropy] + reg_losses)
        
        # Compute gradients
        with tf.control_dependencies([cost_averages_op]):
            opt = tf.train.AdamOptimizer(LEARNING_RATE)
            grads = opt.compute_gradients(total_loss)
        
        # Apply gradients
        apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)
        
        tt.board.variables_histogram_summary()
        
        # Add histograms for gradients
        tt.board.gradients_histogram_summary(grads)
        
        with tf.control_dependencies([apply_gradient_op]):
            train_op = tf.no_op(name='train')
        
    return train_op

In [25]:
def accuracy(output, label):
    with tf.name_scope("Accuracy"):
        correct_prediction = tf.equal(tf.argmax(output,1), tf.argmax(label,1))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    return accuracy

In [26]:
with tf.Graph().as_default():
    global_step = tf.Variable(0, trainable=False)
    
    x = tf.placeholder(tf.float32, [None, 784], "X")
    y_ = tf.placeholder(tf.float32, [None, 10], "Y_")
    keep_prob = tf.placeholder(tf.float32, name="KeepProb")
    
    output = inference(x, keep_prob)
    train_op = train(output, y_, global_step)
    accuracy_op = accuracy(output, y_)
    
    summary_op = tf.merge_all_summaries()
    
    gpu_options = tf.GPUOptions(
        per_process_gpu_memory_fraction=0.5,
        allow_growth=True)
    with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
        sess.run(tf.initialize_all_variables())

        # Visualize graph
        tt.visualization.show_graph(sess.graph_def)
        
        # Create a TensorBoard Writer of all summaries
        summary_writer = tf.train.SummaryWriter(TRAIN_DIR, sess.graph)

        for step in range(MAX_STEPS):
            
            start_time = time.time()
            
            batch = mnist.train.next_batch(BATCH_SIZE)

            sess.run(train_op, feed_dict={x: batch[0],
                                          y_: batch[1],
                                          keep_prob: DROPOUT})
            
            duration = time.time() - start_time
            
            if step % 10 == 0:
                train_accuracy = sess.run(accuracy_op,
                                          feed_dict={x:batch[0],
                                                     y_: batch[1],
                                                     keep_prob: 1.0})
                
                num_examples_per_step = BATCH_SIZE
                examples_per_sec = num_examples_per_step / duration
                sec_per_batch = float(duration)

                format_str = ('%s: step %d, acc = %.2f (%.1f examples/sec; %.3f sec/batch)')
                print (format_str % (datetime.now().time(), step, train_accuracy,
                                     examples_per_sec, sec_per_batch))
                
            if step % 100 == 0:
                summary_str = sess.run(summary_op,
                                      feed_dict={x:batch[0],
                                                 y_: batch[1],
                                                 keep_prob: 1.0})
                summary_writer.add_summary(summary_str, step)
                # flush data because there can be problems writing the data
                summary_writer.flush() 
                
        print("Test accuracy: %g" % sess.run(accuracy_op, 
                                             feed_dict={x: mnist.test.images[:1000],
                                                        y_: mnist.test.labels[:1000],
                                                        keep_prob: 1.0}))
        
print("DONE! All outputs have been written to: {0}".format(TRAIN_DIR))

5 regularization losses in total.


15:04:19.382840: step 0, acc = 0.22 (1730.0 examples/sec; 0.029 sec/batch)
15:04:19.762172: step 10, acc = 0.08 (4794.4 examples/sec; 0.010 sec/batch)
15:04:19.850322: step 20, acc = 0.06 (6012.5 examples/sec; 0.008 sec/batch)
15:04:19.937369: step 30, acc = 0.08 (5739.2 examples/sec; 0.009 sec/batch)
15:04:20.025603: step 40, acc = 0.16 (5735.9 examples/sec; 0.009 sec/batch)
15:04:20.112606: step 50, acc = 0.04 (5378.1 examples/sec; 0.009 sec/batch)
15:04:20.198531: step 60, acc = 0.12 (6275.1 examples/sec; 0.008 sec/batch)
15:04:20.285779: step 70, acc = 0.10 (5775.1 examples/sec; 0.009 sec/batch)
15:04:20.372542: step 80, acc = 0.14 (5962.2 examples/sec; 0.008 sec/batch)
15:04:20.461278: step 90, acc = 0.08 (6000.3 examples/sec; 0.008 sec/batch)
15:04:20.548877: step 100, acc = 0.10 (5823.5 examples/sec; 0.009 sec/batch)
15:04:20.800980: step 110, acc = 0.18 (5696.6 examples/sec; 0.009 sec/batch)
15:04:20.891005: step 120, acc = 0.12 (6049.6 examples/sec; 0.008 sec/batch)
15:04:20.9