Adapted from 

https://github.com/tensorflow/tensorflow/examples/tutorials/mnist/mnist_deep.py
    
see also 

https://www.tensorflow.org/get_started/mnist/pros

Specifically we're taking the deep MNIST tutorial, turning it into a notebook, and making it tensorboard-friendly.  Turning it into a notebook means unpacking it--no non-TF function calls--which makes it very easy to put print statements in wherever and see what is going on.  The tensorboard calls attempt to mirror as closely as possible what is done in this excellent tutorial video

https://www.youtube.com/watch?v=eBbEDRsCmv4

and associated code.  For questions email Matt, mattphillipsphd@gmail.com

Basically there are four aspects of using Tensorboard illustrated in this simple example.

1) Use name scopes ("with tf.name_scope(...)").  Tensorboard graphs are impossible to read without them.  Name scopes tell Tensorboard about the hierarchical and graphical structure of your model.

2) Summary scalars.  E.g., the progression of accuracy and loss across time.

3) Distributions of parameter and layer output values across time.

4) Embeddings.  PCA and t-SNE

In [59]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import argparse
import os
import sys

from tensorflow.examples.tutorials.mnist import input_data

import tensorflow as tf

FLAGS = None

In [60]:
# Create the log directory: You will subsequently launch tensorboard with the command
# tensorboard --log-dir=<PATH TO LOGDIR>

LOGDIR = "tf_graphs/mnist_example/"

In [61]:
parser = argparse.ArgumentParser()
parser.add_argument('--data_dir', type=str,
                  default='/tmp/tensorflow/mnist/input_data',
                  help='Directory for storing input data')
FLAGS, unparsed = parser.parse_known_args()

In [62]:
# Import data
mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True)

Extracting /tmp/tensorflow/mnist/input_data/train-images-idx3-ubyte.gz
Extracting /tmp/tensorflow/mnist/input_data/train-labels-idx1-ubyte.gz
Extracting /tmp/tensorflow/mnist/input_data/t10k-images-idx3-ubyte.gz
Extracting /tmp/tensorflow/mnist/input_data/t10k-labels-idx1-ubyte.gz


In [63]:
# Default values obtained 96% classification accuracy after 10 iterations

conf = dict()

conf["conv_window1"] = 5             # Default: 5
conf["conv_window2"] = 5             # Default: 5
conf["num_feat_maps1"] = 32          # Default: 32
conf["num_feat_maps2"] = 64          # Default: 64
conf["downsampling1"] = 2            # Default: 2 
conf["downsampling2"] = 2            # Default: 2
conf["num_dense_feats"] = 1024       # Default: 1024
conf["num_labels"] = 10              # Default: 10

conf["lr"] = 1e-4                    # Default: 1e-4

In [64]:
tf.reset_default_graph()

In [65]:
# Create the model
x = tf.placeholder(tf.float32, [None, 784], name="x")

# x: an input tensor with the dimensions (N_examples, 784), where 784 is the
# number of pixels in a standard MNIST image.

In [66]:
# Reshape to use within a convolutional neural net.
# Last dimension is for "features" - there is only one here, since images are
# grayscale -- it would be 3 for an RGB image, 4 for RGBA, etc.
x_image = tf.reshape(x, [-1, 28, 28, 1])

In [67]:
# First convolutional layer - maps one grayscale image to 32 feature maps.
with tf.name_scope("conv1"):
    with tf.name_scope("convolve"):
        shape1 = [conf["conv_window1"], conf["conv_window1"], 1, conf["num_feat_maps1"]]
        W_conv1 = tf.Variable( tf.truncated_normal(shape1,
                                                   stddev=0.1) )
        b_conv1 = tf.Variable( tf.constant(0.1, 
                                           shape=[ conf["num_feat_maps1"] ]) )

        conv2d1 = tf.nn.conv2d(x_image, 
                               W_conv1,
                               strides=[1, 1, 1, 1],
                               padding='SAME')
        # a 2d convolution layer with full stride.

        h_conv1 = tf.nn.relu(conv2d1 + b_conv1)
    
        # Here we record distributions
        tf.summary.histogram("weights", W_conv1)
        tf.summary.histogram("biases", b_conv1)
        tf.summary.histogram("activations", h_conv1)

    # Pooling layer - downsamples by 2X.
    with tf.name_scope("pool"):
        ds_shape1 = [1, conf["downsampling1"], conf["downsampling1"], 1]
        h_pool1 = tf.nn.max_pool(h_conv1, 
                                 ksize=ds_shape1,
                                 strides=ds_shape1,
                                 padding='SAME')

In [68]:
# Second convolutional layer -- maps 32 feature maps to 64.
with tf.name_scope("conv2"):
    with tf.name_scope("convolve"):
        shape2 = [conf["conv_window2"], conf["conv_window2"], conf["num_feat_maps1"], conf["num_feat_maps2"]]
        W_conv2 = tf.Variable( tf.truncated_normal(shape2,
                                                   stddev=0.1) )
        b_conv2 = tf.Variable( tf.constant(0.1,
                                           shape=[ conf["num_feat_maps2"] ]) )

        conv2d2 = tf.nn.conv2d(h_pool1,
                               W_conv2,
                               strides=[1, 1, 1, 1],
                               padding='SAME')
        # a 2d convolution layer with full stride.

        h_conv2 = tf.nn.relu(conv2d2 + b_conv2)
    
        # Here we record distributions
        tf.summary.histogram("weights", W_conv2)
        tf.summary.histogram("biases", b_conv2)
        tf.summary.histogram("activations", h_conv2)
    
    # Second pooling layer.
    with tf.name_scope("pool"):
        ds_shape2 = [1, conf["downsampling2"], conf["downsampling2"], 1]
        h_pool2 = tf.nn.max_pool(h_conv2, 
                                 ksize=ds_shape2,
                                 strides=ds_shape2,
                                 padding='SAME')

num_pool2_out = int( h_pool2.shape[1] * h_pool2.shape[2] * h_pool2.shape[3] )

In [69]:
# Fully connected layer -- after 2 round of downsampling, our 28x28 image
# is down to 7x7x64 feature maps in the default configuration -- maps this to 1024 features.
with tf.name_scope("fc"):
    fc_shape_in = [num_pool2_out, conf["num_dense_feats"]]
    W_fc1 = tf.Variable( tf.truncated_normal(fc_shape_in,
                                             stddev=0.1) )
    b_fc1 = tf.Variable( tf.constant(0.1,
                                     shape=[ conf["num_dense_feats"] ]) )
    
    h_pool2_flat = tf.reshape(h_pool2, [-1, num_pool2_out])
    h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)

In [70]:
# Dropout - controls the complexity of the model, prevents co-adaptation of
# features.
keep_prob = tf.placeholder(tf.float32)
# keep_prob is a scalar placeholder for the probability of dropout.

h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

In [71]:
# Map the 1024 features to 10 classes, one for each digit
shape_out = [conf["num_dense_feats"], conf["num_labels"]]
W_fc2 = tf.Variable( tf.truncated_normal(shape_out, stddev=0.1) )
b_fc2 = tf.Variable( tf.constant(0.1, shape=[ conf["num_labels"] ]) )

In [72]:
y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2

# y is a tensor of shape (N_examples, 10), with values
# equal to the logits of classifying the digit into one of 10 classes (the
# digits 0-9).

Train the network, and add performance variables to track over time:

For tensorboard, this means 

    1) Adding tf.summary.scalar statements.
    2) Creating a summary object ('summ') at the end
    3) Below, running the session passing in the 'summ' argument

In [73]:
# Define loss and optimizer
y_ = tf.placeholder(tf.float32, [None, conf["num_labels"]], name="labels")

with tf.name_scope("xent"):
    cross_entropy = tf.reduce_mean(
      tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv))
    tf.summary.scalar("xent", cross_entropy)

with tf.name_scope("train"):
    train_step = tf.train.AdamOptimizer( conf["lr"] ).minimize(cross_entropy)

with tf.name_scope("accuracy"):
    correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    tf.summary.scalar("accuracy", accuracy)
   
summ = tf.summary.merge_all()

In [74]:
#Start the session.  We need to add a writer to record the session graph and embeddings.

sess = tf.Session()
sess.run(tf.global_variables_initializer())

# Add the session graph.  Prior proper use of name scopes is critical for this.
hparam =  "lr_%.0E,%d" % (conf["lr"], conf["num_dense_feats"])
writer = tf.summary.FileWriter(LOGDIR + hparam)
writer.add_graph(sess.graph)

Create embeddings to use in PCA and t-SNE visualization.  These use the outputs of the last convolutional layer, not the fully connected layers.  This also critically requires that model *checkpoints* be saved during training.

In [75]:
embedding_input = tf.reshape(h_pool2, [-1, num_pool2_out])
embedding_size = num_pool2_out
embedding = tf.Variable(tf.zeros([1024, embedding_size]), name="test_embedding")
assignment = embedding.assign(embedding_input)
saver = tf.train.Saver()

In [76]:
# This cell is 'cheating', it uses images/data created by Dandelion Mane

config = tf.contrib.tensorboard.plugins.projector.ProjectorConfig()
embedding_config = config.embeddings.add()
embedding_config.tensor_name = embedding.name
embedding_config.sprite.image_path = LOGDIR + 'sprite_1024.png'
embedding_config.metadata_path = LOGDIR + 'labels_1024.tsv'
# Specify the width and height of a single thumbnail.
embedding_config.sprite.single_image_dim.extend([28, 28])
tf.contrib.tensorboard.plugins.projector.visualize_embeddings(writer, config)

In [77]:
#Execute the training

for i in range(1001):
    batch = mnist.train.next_batch(50)
    if i % 5 == 0:
#        train_accuracy = accuracy.eval(session=sess, feed_dict={x: batch[0], y_: batch[1], keep_prob: 1.0})
        [train_accuracy, s] = sess.run([accuracy, summ], feed_dict={x: batch[0], y_: batch[1], keep_prob: 1.0})
        writer.add_summary(s, i)
    if i % 100 == 0:
        print('step %d, training accuracy %g' % (i, train_accuracy))

    # Here is where we save the checkpoint for the embedding visualization
    if i % 500 == 0:
        sess.run(assignment, feed_dict={x: mnist.test.images[:1024], y_: mnist.test.labels[:1024]})
        saver.save(sess, os.path.join(LOGDIR, "model.ckpt"), i)
    train_step.run(session=sess, feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})

print('test accuracy %g' % accuracy.eval(session=sess, feed_dict={
    x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0}))

step 0, training accuracy 0
step 100, training accuracy 0.8
step 200, training accuracy 0.86
step 300, training accuracy 0.88
step 400, training accuracy 0.92
step 500, training accuracy 0.94
step 600, training accuracy 0.94
step 700, training accuracy 1
step 800, training accuracy 0.96
step 900, training accuracy 0.98
step 1000, training accuracy 0.94
test accuracy 0.9612


# Exercises: 

1) Augment this notebook to use and track test-set as well as training-set accuracy.

2) Add more and different layers, look at graph