In [1]:
import os.path
import time

import numpy as np
import tensorflow as tf

In [2]:
def export_tensorboard_graph(path):
    """Helper to export graph data to be explored in `tensorboard`"""
    tb_dir = os.path.join('tbout', path)
    print('Exporting TensorBoard graph to {}'.format(tb_dir))
    tf.summary.FileWriter(tb_dir, graph=tf.get_default_graph()).close()

## Control dependencies

### Race conditions: a contrived (but demonstrative) example

In the below code, notice that both `a` and `b` try to set the value of the `Variable`, `var`. Because there is no implicit dependency between `a` and `b`, they can run in either order each time a `Session` runs them. This causes unstable results, as illustrated with output printed to console:

In [3]:
tf.reset_default_graph()
# Our mutable state
var = tf.Variable(0)
a = tf.assign(var, 2 * var)
b = tf.assign_add(var, 2)
c = a + b

init = tf.global_variables_initializer()

# Print the value of c over multiple iterations to show non-determinism
for _ in range(5):
    with tf.Session() as sess:
        vals = []
        sess.run(init)
        for _ in range(10):
            val = sess.run(c)
            vals.append(val)
        print(vals)

[4, 16, 40, 80, 164, 332, 672, 1352, 2712, 5432]
[0, 8, 24, 56, 112, 232, 472, 952, 1912, 3832]
[4, 16, 36, 80, 168, 344, 696, 1400, 2808, 5624]
[4, 16, 40, 88, 184, 376, 760, 1528, 3064, 6136]
[8, 24, 56, 120, 248, 504, 1016, 2040, 4088, 8184]


Although there is no implicit dependency, we can force an _explicit_ dependency by using [`tf.control_dependencies()`](https://www.tensorflow.org/api_docs/python/tf/control_dependencies). Below, we use it to add `a` as a dependency to `b`, forcing `b` to wait until `a` has finished before running. Because of this, our execution order, and thus our output, is now completely deterministic:

In [4]:
tf.reset_default_graph()
# Our mutable state
var = tf.Variable(0)
a = tf.assign(var, 2 * var)
# Force b to wait for a
with tf.control_dependencies([a]):
    b = tf.assign_add(var, 2)
c = a + b

init = tf.global_variables_initializer()

# Each run will now be identical
for _ in range(5):
    with tf.Session() as sess:
        vals = []
        sess.run(init)
        for _ in range(10):
            val = sess.run(c)
            vals.append(val)
        print(vals)

[4, 12, 28, 60, 124, 252, 508, 1020, 2044, 4092]
[4, 12, 28, 60, 124, 252, 508, 1020, 2044, 4092]
[4, 12, 28, 60, 124, 252, 508, 1020, 2044, 4092]
[4, 12, 28, 60, 124, 252, 508, 1020, 2044, 4092]
[4, 12, 28, 60, 124, 252, 508, 1020, 2044, 4092]


### Transfering parameters from one set of variables to another

Example: Deep Q-Network, need to send "online" parameters to the "target" parameters periodically.

In [5]:
# Helper to create dummy graph with many value to be transferred from one Variable to another.
def create_var_updates_and_init():
    tf.reset_default_graph()
    print('Creating variables...')
    # Create dummy lists of variables for example
    master_vars = [tf.Variable(tf.random_normal([100, 100])) for i in range(100)]
    replica_vars = [tf.Variable(tf.random_normal([100, 100])) for i in range(100)]
    # Create assign ops for each variable
    update_ops = []
    for i, var in enumerate(replica_vars):
        master_var = master_vars[i]
        update_ops.append(var.assign(master_var))
    # initalization operation
    init = tf.global_variables_initializer()
    print('Done.')
    return update_ops, init

How should we run all of the `assign` ops? I've seen things like this:

In [6]:
update_ops, init = create_var_updates_and_init()
# Run each update in a separate Session.run() call
with tf.Session() as sess:
    sess.run(init)
    start_t = time.time()
    print('Updating Variables...')
    for update in update_ops:
        sess.run(update)
    end_t = time.time()
    print('Done.')
    print('Time elapsed: {} seconds'.format(end_t - start_t))

Creating variables...
Done.
Updating Variables...
Done.
Time elapsed: 0.7024047374725342 seconds


We can do better.

In [7]:
update_ops, init = create_var_updates_and_init()
# Create one "master" operation which forces all update ops to execute
with tf.control_dependencies(update_ops):
    assign_all = tf.no_op()
with tf.Session() as sess:
    sess.run(init)
    print('Updating Variables...')
    start_t = time.time()
    sess.run(assign_all)
    end_t = time.time()
    print('Done.')
    print('Time elapsed: {} seconds'.format(end_t - start_t))

Creating variables...
Done.
Updating Variables...
Done.
Time elapsed: 0.0600435733795166 seconds


### `tf.group`

The syntax can be made cleaner by using the `tf.group` operation:

In [8]:
update_ops, init = create_var_updates_and_init()
# Create one "master" operation which forces all update ops to execute
assign_all = tf.group(*update_ops)
with tf.Session() as sess:
    sess.run(init)
    start_t = time.time()
    sess.run(assign_all)
    end_t = time.time()
    print('Time elapsed: {} seconds'.format(end_t - start_t))

Creating variables...
Done.
Time elapsed: 0.05835080146789551 seconds


Using a grouping operation is cleaner, faster, more idiomatic. Compared to the `tf.control_dependencies()` version above, `tf.group()` provides some additional functionality under-the-hood, making sure that operations are grouped according to their device. [Check out the implementation here](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/control_flow_ops.py#L2784-L2846).

### `tf.tuple`

If you want to synchronize multiple parallel operations, [`tf.tuple()`](https://www.tensorflow.org/versions/master/api_docs/python/tf/tuple) is an easy solution. You simply pass in a list of tensors, and `tf.tuple()` prevents future operations from using those tensors until they have all been computed.

_Note: TensorFlow operations automatically wait for all dependencies to finish before executing. Use `tf.tuple` for situations where you need to explicitly provide synchronization._

In [9]:
def make_matmul(dims):
    return tf.matmul(tf.random_normal([dims, dims]), tf.random_normal([dims, dims]))

In [10]:
tensor_a = make_matmul(100)
tensor_b = make_matmul(1000)
tensor_c = make_matmul(10000)
sync_a, sync_b, sync_c = tf.tuple([tensor_a, tensor_b, tensor_c])

## Conditional Statements

## `tf.cond`

`tf.cond` is basically an `if/else` statement. You provide a boolean predicate and two functions which return tensors. One will run if the predicate is `True`, the other if it is `False`. Here's a simple example:

In [11]:
tf.reset_default_graph()
pred = tf.placeholder(tf.bool)
def run_if_true():
    return tf.add(3, 3)
def run_if_false():
    return tf.square(3)
out = tf.cond(pred, run_if_true, run_if_false)

In [12]:
# We can run this multiple times and see the result from randomly selecting a true/false input
with tf.Session() as sess:
    choice = np.random.choice([True, False])
    feed_dict = {pred: choice}
    res = sess.run(out, feed_dict)
    print('Choice: {}\tResult: {}'.format(choice, res))

Choice: True	Result: 6


For simple functions, we can use lambdas instead:

In [13]:
tf.reset_default_graph()
pred = tf.placeholder(tf.bool)
out = tf.cond(pred, lambda: tf.add(3, 3), lambda: tf.square(3))

## Basic `tf.case` example

Try adjusting the `feed_dict` value for `prev` to see how the graph execution changes depending on the input.

In [14]:
tf.reset_default_graph()
prev = tf.placeholder(tf.float32)
a = (prev < 0,  lambda: prev + 3)
b = (prev < 10, lambda: prev * 3)
c = (prev < 20, lambda: prev - 3)
default = lambda: prev / 3
pairs = [a, b, c]
out = tf.case(pairs, default)

with tf.Session() as sess:
    res = sess.run(out, {prev: 21})
    print(res)

7.0


## Stochastic Depth

https://arxiv.org/abs/1603.09382

In [15]:
def stochastic_depth_conv2d(inputs, filters, kernel_size, keep_prob, padding='same', activation=tf.nn.relu, name=None):
    default_name = tf.get_default_graph().unique_name('stochastic_depth_conv')
    with tf.variable_scope(name, default_name):
        def full_layer():
            return tf.layers.conv2d(inputs, filters, kernel_size, padding=padding, activation=activation, name='conv')
        def skip_layer():
            if inputs.get_shape().as_list()[-1] != filters:
                return tf.layers.conv2d(inputs, filters, [1, 1], padding=padding, activation=activation, name='skip')
            else:
                return inputs
        pred = tf.random_uniform([]) < keep_prob
        return tf.cond(pred, full_layer, skip_layer), pred

In [16]:
tf.reset_default_graph()
inputs = tf.placeholder(tf.float32, [None, 228, 228, 3], name='inputs')
keep_prob = tf.placeholder(tf.float32, [], name='keep_prob')
conv, _ = stochastic_depth_conv2d(inputs, 32, [3, 3], keep_prob)
conv, _ = stochastic_depth_conv2d(conv, 32, [3, 3], keep_prob)
conv, _ = stochastic_depth_conv2d(conv, 32, [3, 3], keep_prob)
conv, _ = stochastic_depth_conv2d(conv, 64, [3, 3], keep_prob)
conv, _ = stochastic_depth_conv2d(conv, 64, [3, 3], keep_prob)
conv, _ = stochastic_depth_conv2d(conv, 64, [3, 3], keep_prob)
init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    feed_dict = {
        inputs: np.random.normal(size=[32, 228, 228, 3]),
        keep_prob: 0.5
    }
    start_t = time.time()
    sess.run(conv, feed_dict)
    end_t = time.time()
    print(end_t - start_t)

export_tensorboard_graph('stochastic_depth')

1.090465784072876
Exporting TensorBoard graph to tbout/stochastic_depth


### Visualizing with TensorBoard

```bash
tensorboard --logdir=tbout/stochastic_depth
```

### Checking out gradients

In [17]:
tf.reset_default_graph()
inputs = tf.placeholder(tf.float32, [None, 3, 3, 1], name='inputs')
keep_prob = tf.placeholder(tf.float32, [], name='keep_prob')
conv1, pred1 = stochastic_depth_conv2d(inputs, 3, [3, 3], keep_prob, name='conv1')
conv2, pred2 = stochastic_depth_conv2d(conv1, 3, [3, 3], keep_prob, name='conv2')
conv3, pred3 = stochastic_depth_conv2d(conv2, 3, [3, 3], keep_prob, name='conv3')
init = tf.global_variables_initializer()

opt = tf.train.GradientDescentOptimizer(0.05)
var_list = tf.trainable_variables()
grads = opt.compute_gradients(conv3, var_list)

In [18]:
with tf.Session() as sess:
    sess.run(init)
    feed_dict = {
        inputs: np.random.normal(size=[1, 3, 3, 1]),
        keep_prob: 0.5
    }
    g, p1, p2, p3 = sess.run([grads, pred1, pred2, pred3], feed_dict)
    print(p1, p2, p3)
    for var, grad_value in zip(var_list, g):
        grad, value = grad_value
        print('',var.op.name, grad.squeeze(), sep='\n')

False True False

conv1/conv/kernel
[[[0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]]

 [[0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]]

 [[0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]]]

conv1/conv/bias
[0. 0. 0.]

conv1/skip/kernel
[ 3.0321338 -4.5531406  2.5450625]

conv1/skip/bias
[3.5176897 4.589042  2.2859795]

conv2/conv/kernel
[[[[8.7887740e-01 0.0000000e+00 2.5197589e-03]
   [1.9824355e+00 9.7104800e-01 0.0000000e+00]
   [6.2200660e-01 0.0000000e+00 1.7833052e-03]]

  [[8.8139719e-01 0.0000000e+00 1.2665364e+00]
   [3.0903292e+00 2.1192811e+00 0.0000000e+00]
   [6.2378991e-01 0.0000000e+00 8.9636391e-01]]

  [[1.2690561e+00 0.0000000e+00 0.0000000e+00]
   [2.0789416e+00 9.7104800e-01 0.0000000e+00]
   [8.9814723e-01 0.0000000e+00 0.0000000e+00]]]


 [[[1.3398508e+00 2.5197589e-03 1.0821075e+00]
   [1.9824355e+00 1.9824355e+00 9.7104800e-01]
   [9.4825065e-01 1.7833052e-03 7.6583838e-01]]

  [[3.6884947e+00 2.1454139e+00 0.0000000e+00]
   [3.0903292e+00 3.0903292e+00 3.1972237e+00]
   [2.6104531e+00 1.518

## Efficiency Comparison

Below, we can see that we only end up running a percentage of the convolutional layers, depending on whether it was dropped or not.

Adjust the `keep_prob` parameter inside `feed_dict` in order to play around with different results. The expected runtime of the stocastic depth version of the network is approximately `keep_prob` times the length of the regular network.

In [19]:
# Stochastic version
tf.reset_default_graph()
print('Building graph...')
inputs = tf.placeholder(tf.float32, [None, 228, 228, 3], name='inputs')
keep_prob = tf.placeholder(tf.float32, [], name='keep_prob')
conv, _ = stochastic_depth_conv2d(inputs, 32, [3, 3], keep_prob)
for i in range(10):
    conv, _ = stochastic_depth_conv2d(conv, 32, [3, 3], keep_prob)
init = tf.global_variables_initializer()
print('Done.')
with tf.Session() as sess:
    sess.run(init)
    feed_dict = {
        inputs: np.random.normal(size=[32, 228, 228, 3]),
        # Adjust this parameter to see run speed adjust
        keep_prob: 0.5
    }
    print('Timing stochastic depth run...')
    start_t = time.time()
    sess.run(conv, feed_dict)
    end_t = time.time()
    print('Done.')
    print(end_t - start_t)

Building graph...
Done.
Timing stochastic depth run...
Done.
1.365527629852295


In [20]:
# Standard CNN version
tf.reset_default_graph()
print('Building graph...')
inputs = tf.placeholder(tf.float32, [None, 228, 228, 3], name='inputs')
conv = tf.layers.conv2d(inputs, 32, [3, 3], padding='same', activation=tf.nn.relu)
for i in range(10):
    conv = tf.layers.conv2d(conv, 32, [3, 3], padding='same', activation=tf.nn.relu)
init = tf.global_variables_initializer()
print('Done.')

with tf.Session() as sess:
    sess.run(init)
    feed_dict = {inputs: np.random.normal(size=[32, 228, 228, 3])}
    print('Timing standard run...')
    start_t = time.time()
    sess.run(conv, feed_dict)
    end_t = time.time()
    print('Done.')
    print(end_t - start_t)

Building graph...
Done.
Timing standard run...
Done.
2.229334592819214


## TensorFlow `while_loop`

### Python `for` loop version

In [21]:
tf.reset_default_graph()
# Accumulator Variable
total = tf.Variable(0.0)
# Random inputs to multiply
a = tf.random_normal([200, 200])
b = tf.random_normal([200, 200])
# Calculate value to add to accumulator variable
mul = tf.matmul(a, b)
mean = tf.reduce_mean(mul)
acc = total.assign_add(mean)
# Initialization op
init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    start_t = time.time()
    for i in range(2000):
        final_total = sess.run(acc)
    end_t = time.time()
    print('Time elapsed: {} seconds'.format(end_t - start_t))

Time elapsed: 1.0568227767944336 seconds


### `while_loop` version

In [22]:
tf.reset_default_graph()
# Accumulator Variable
total = tf.Variable(0.0)
i = tf.constant(0)
# Define the loop body
def body(i, _):
    a = tf.random_normal([200, 200])
    b = tf.random_normal([200, 200])
    mul = tf.matmul(a, b)
    mean = tf.reduce_mean(mul)
    acc = total.assign_add(mean)
    return i+1, acc
# Define the loop condition
def condition(i, _):
    return tf.less(i, 2000)
# Create the while_loop
out = tf.while_loop(condition, body, [i, total])
# Initialization op
init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    start_t = time.time()
    final_i, final_total = sess.run(out)
    end_t = time.time()
    print('Time elapsed: {} seconds'.format(end_t - start_t))

Time elapsed: 0.3016061782836914 seconds


### Condensed version of the above

This is the same graph from above, but condensed with by using a `lambda` and some TensorFlow sugar. We'll also export it so that we can explore it in `tensorboard`

In [23]:
tf.reset_default_graph()
# Accumulator Variable
total = tf.Variable(0.0)
# Define the loop body
def body(i, _):
    a = tf.random_normal([200, 200])
    b = tf.random_normal([200, 200])
    mul = tf.matmul(a, b)
    mean = tf.reduce_mean(mul)
    acc = total.assign_add(mean)
    return i+1, acc
# Create the while_loop
out = tf.while_loop(lambda i, _: i < 200, body, [0, total])
# Initialization op
init = tf.global_variables_initializer()

export_tensorboard_graph('while_loop')

Exporting TensorBoard graph to tbout/while_loop


### Visualizing with TensorBoard

```bash
tensorboard --logdir=tbout/while_loop
```

## What about "unrolling" the loop?

In [24]:
# Unrolled version
tf.reset_default_graph()
graph_start_time = time.time()
# Random inputs to multiply
a = tf.random_normal([200, 200])
for i in range(2000):
    a = tf.matmul(a, tf.random_normal([200, 200]))
graph_creation_time = time.time() - graph_start_time
with tf.Session() as sess:
    start_t = time.time()
    final_total = sess.run(a)
    end_t = time.time()
    print('Graph creation time: {} seconds'.format(graph_creation_time))
    print('Time elapsed: {} seconds'.format(end_t - start_t))

Graph creation time: 8.232242107391357 seconds
Time elapsed: 3.455287218093872 seconds


In [25]:
# tf.while_loop version
tf.reset_default_graph()
graph_start_time = time.time()
# Define the loop body
def body(i, a):
    with tf.name_scope('body'):
        return i+1, tf.matmul(a, tf.random_normal([200, 200]))
# Create the while_loop
out = tf.while_loop(lambda i, _: i < 2000, body, [0, tf.random_normal([200, 200])])
graph_creation_time = time.time() - graph_start_time
with tf.Session() as sess:
    start_t = time.time()
    final_i, final_total = sess.run(out)
    end_t = time.time()
    print('Graph creation time: {} seconds'.format(graph_creation_time))
    print('Time elapsed: {} seconds'.format(end_t - start_t))

Graph creation time: 0.020896196365356445 seconds
Time elapsed: 0.43869447708129883 seconds


### Simple RNN

`tf.dynamic_rnn` is implemented with a `tf.while_loop`. The actual implementation is much more robust (uses `RNNCell`, saves state at each step, accepts per-input lengths, etc), but this illustrates a basic example.

If you want to save states from each time step, you'll want to use a `TensorArray`.

In [26]:
# tf.while_loop version
tf.reset_default_graph()
x_input = tf.placeholder(tf.float32, [None, 20, 200])
length_input = tf.placeholder(tf.int32, [])
# Define the condition
def cond(x, state, i, length):
    return i < length
# Define the loop body
def body(x, state, i, length):
    with tf.variable_scope('body', initializer=tf.random_normal_initializer()):
        x_reshape = tf.reshape(x, [20, -1, 200])
        # Get the input at timestep i
        x_slice = tf.gather(x_reshape, i)
        w = tf.get_variable('weight', [200, 200])
        z = tf.matmul(x_slice + state, w)
        a = tf.nn.tanh(z)
        return x, a, i+1, length 
# Create the while_loop
# Create zeros with dynamic shape based on inputs
state_shape = tf.stack([tf.shape(x_input)[0], 200])
start_state = tf.zeros(state_shape)
# Create while_loop
out = tf.while_loop(cond, body, (x_input, start_state, 0, length_input))
init = tf.global_variables_initializer()

In [27]:
with tf.Session() as sess:
    sess.run(init)
    feed_dict = {
        x_input: np.random.normal(size=[10, 20, 200]),
        length_input: 20
    }
    start_t = time.time()
    final_x, final_state, final_i, final_length = sess.run(out, feed_dict)
    end_t = time.time()
    print(end_t - start_t)
    print(final_state)

0.012059688568115234
[[ 0.9999997  -1.          1.         ...  1.         -0.994401
   1.        ]
 [-0.9945827  -0.9999559   0.9984477  ... -1.         -0.99999994
  -0.9357829 ]
 [-1.         -1.          1.         ...  1.         -1.
   1.        ]
 ...
 [-1.          1.          1.         ... -0.99112177  1.
   0.99999833]
 [-0.99987483  1.          1.         ...  1.          1.
  -1.        ]
 [ 0.99998266 -1.          1.         ...  1.          1.
   0.9474561 ]]
