In [1]:
import tensorflow as tf

import os
import numpy as np
import matplotlib.pyplot as plt

from subprocess import call

# 1. magic for inline plot
# 2. magic to print version
# 3. magic so that the notebook will reload external python modules
%matplotlib inline
%load_ext watermark
%load_ext autoreload 
%autoreload 2

%watermark -a 'Ethen' -d -t -v -p numpy,tensorflow

Ethen 2017-07-24 15:00:48 

CPython 3.5.2
IPython 5.4.1

numpy 1.13.1
tensorflow 1.1.0


# Tensorboard

In [Google’s own words](https://www.tensorflow.org/get_started/summaries_and_tensorboard): "The computations you'll use TensorFlow for - like training a massive deep neural network - can be complex and confusing. To make it easier to understand,
debug, and optimize TensorFlow programs, we've included a suite of visualization tools called TensorBoard.

The following code chunk contains a couple important concepts. When specify our computational graph, there are two tricks that can make our lives easier later on.

- The first one is giving names to our operations (a.k.a ops), e.g. `tf.add(a, b, name = 'y')`
- The other one is to group similar nodes together with `tf.name_scope()`. In the simple example below, we group all ops related to input together and all ops related to output together. It might seem like an overkill for this trivial example, but when we start to build models that are more and more complex, the debugging process can grow to be extremely daunting.

Then it's about collecting summary data along the way. To elaborate, in machine learning models, we often wish to track how the objective/loss function or accuracy is changing over iterations to evaluate whether to model converged or not. With Tensorflow, we can collect these by attaching `tf.summary.scalar` ops to the nodes that output the scalar/value that we're interested in.

In [2]:
LOG_PATH = './tmp/example-1/'

with tf.name_scope('input'):
    a = tf.placeholder(tf.float32, name = 'a')
    b = tf.placeholder(tf.float32, name = 'b')

with tf.name_scope('output'):
    y = tf.add(a, b, name = 'y')

# create a summary to monitor y,
# we can create other summary by just
# adding more tf.summary.scalar
tf.summary.scalar('y_value', y)

# merge all summaries into a single op
# that generates all the summary data
# so we don't have to handle them individually
merged = tf.summary.merge_all()

with tf.Session() as sess:
    # create log writer object and pass in the log path
    # the logdir is quite important, it's the directory where all of the 
    # events will be written out. The FileWriter can optionally take a Graph 
    # in its constructor, which in our case will be the computational graph 
    # for the current session. If it receives a Graph object, then TensorBoard
    # will visualize our graph along with tensor shape information
    writer = tf.summary.FileWriter(logdir = LOG_PATH, graph = sess.graph)

    # perform calculation, where we just keep adding numbers
    a_value = 1
    b_value = 2
    for i in range(10):
        summary, y_value = sess.run([merged, y], feed_dict = {a: a_value, b: b_value})

        # we write logs at every iteration, note that we could also consider writing
        # the summary log for every n iterations instead
        writer.add_summary(summary, i)

        a_value = b_value
        b_value = y_value
        print(y_value)

3.0
5.0
8.0
13.0
21.0
34.0
55.0
89.0
144.0
233.0


In [None]:
# mimick command line call to lauch tensorboard
# tensorboard --logdir=[Our log path]
call('tensorboard --logdir={}'.format(LOG_PATH), shell = True)

Now we can navigate to http://0.0.0.0:6006 to look at our tensorboard. Or we can add an additional argument to the command line call to specify the port for the tensorboard, e.g. if we add `--port=8001` then the tensorboard will be located at `localhost:8001` in our browser.

The tensorboard should look very similar to the screenshot below:

Under the scalars tab: We've only logged the value for y, so there will only be one scalar summary.

<img src="images/tensorboard_summary.png" width="60%" height="60%">

Under the graph tab, since we have grouped two ops under the input name scope, if we click on the plus sign on top of its name scope block, we can see all the ops inside that block.

<img src="images/tensorboard_graph.png" width="60%" height="60%">

Note that we might want to delete the original logdir directory if we were to re-run the code several times. Or else Tensorboard might include duplicate things in it or display warning since there will be multiple event files.

To learn more, the following link has a more comprehensive guide for configuring Tensorboard. [Tensorflow Documentation: TensorBoard: Visualizing Learning](https://www.tensorflow.org/get_started/summaries_and_tensorboard)

# Queues and Coordinators



In [4]:
N_SAMPLES = 1000
N_THREADS = 4

# Generating some simple data
# create 1000 random samples, each is a 1D array from the normal distribution
data = 10 * np.random.randn(N_SAMPLES, 4) + 1
data

array([[  1.80146751,  -9.16441146,  -8.95531492,   9.18517489],
       [ -0.85832641,  -9.27301211,   1.4138157 ,   3.31116201],
       [-18.77402339,   7.97979842,  22.10523166, -12.26695909],
       ..., 
       [ -7.70182672,  -5.23895938,  -5.03024986,   8.77285877],
       [  6.80702113,  -2.51661907, -13.29981891,   0.79241951],
       [  3.99524144,   3.32551964, -11.65769596,   5.71774662]])

In [5]:
# create 1000 random labels of 0 and 1
target = np.random.randint(0, 2, size = N_SAMPLES) 
target[:5]

array([1, 1, 1, 1, 1])

In [6]:
queue = tf.FIFOQueue(capacity = 50, dtypes = [tf.float32, tf.int32], shapes=[[4], []])
enqueue_op = queue.enqueue_many([data, target])
data_sample, label_sample = queue.dequeue()

# create NUM_THREADS to do enqueue
queue_runner = tf.train.QueueRunner(queue, [enqueue_op] * N_THREADS)

In [7]:
# create ops that do something with data_sample and label_sample
with tf.Session() as sess:
    # create a coordinator, launch the queue runner threads.
    coord = tf.train.Coordinator()
    enqueue_threads = queue_runner.create_threads(sess, coord = coord, start = True)
    try:
        # just do 10 iterations
        for step in range(10):
            if coord.should_stop():
                break
            
            data_batch, label_batch = sess.run([data_sample, label_sample])
            print(data_batch)
            print(label_batch)
    except Exception as e:
        coord.request_stop(e)
    finally:
        coord.request_stop()
        coord.join(enqueue_threads)

[ 1.80146754 -9.16441154 -8.95531464  9.18517494]
1
[-0.85832644 -9.27301216  1.41381574  3.31116199]
1
[-18.77402306   7.97979832  22.10523224 -12.26695919]
1
[ -1.58828139  -4.32889605  11.34604073 -10.0936327 ]
1
[ -5.84605885  -4.87499094   5.8274188  -16.76111603]
1
[  5.37085438  -5.12846851  31.64710999   2.97502017]
0
[-15.67945766   1.69698191   8.96185017 -12.79179668]
1
[ -1.01402497   3.75115848   3.21570444  10.64387035]
1
[ 15.26858902 -12.67862034  -4.838974   -15.67602539]
1
[  1.97815228 -11.94149208  -4.73292971  -1.97893286]
0


# Reference

- [Blog: Just another Tensorflow beginner guide (Part1)](http://liufuyang.github.io/2017/03/12/just-another-tensorflow-beginner-guide-1.html)
- [Tensorflow Documentation: TensorBoard: Visualizing Learning](https://www.tensorflow.org/get_started/summaries_and_tensorboard)
- [Note: CS 20SI Lecture note 9:  Input Pipeline](http://web.stanford.edu/class/cs20si/lectures/notes_09.pdf)