In [53]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np
import tensorflow as tf

### Tensor Values
- Rank: Number of dimensions
- Shape: Tuple specifying the array's length along each dimension
- Example:
  + 3. --> Rank:0 (scalar) | Shape:[]
  + [1., 2., 3.]  --> Rank:1 (vector) | Shape: [3]
  + [[1., 2., 3.], [4., 5., 6]]  --> Rank:2 (matrix) | Shape: [2, 3]
  + [[[1., 2., 3.]], [[4., 5., 6.]]]  --> Rank: 3  | Shape: [2, 1, 3]

### Steps:
1. Build the computational graph: `tf.Graph`
2. Run the computational graph: `tf.Session`

## Graph
- It is a series of TF operations arranged into a graph.
- It is composed of 2 types of objects:
  - Operations (ops): The nodes of the graph. Describe calculations that consume and produce tensors.
  - Tensors: The edges of the graph. Values that will flow through the graph. Most TF fcns return `tf.Tensors`.

In [60]:
tf.reset_default_graph()  # Reset the default graph
a = tf.constant(3.0, dtype=tf.float32)
b = tf.constant(4.0)  # also tf.float32 implicitly
total = a + b
print(a)
print(b)
print(total)

Tensor("Const:0", shape=(), dtype=float32)
Tensor("Const_1:0", shape=(), dtype=float32)
Tensor("add:0", shape=(), dtype=float32)


## Tensorboard
- The event file from a graph has the format: `events.out.tfevents.{timestamp}.{hostname}`
- Launch Tensorflow:
``tensorboard --logdir {file_path}``

In [61]:
# Save the computation graph to a TensorBoard summary file:
writer = tf.summary.FileWriter('.')
writer.add_graph(tf.get_default_graph())

## Session
- It encapsulates the state of the TF runtime, and runs TF ops. If a `tf.Graph` is like a `.py` file, a `tf.Session` is like the `python` executable.
- Backtracks through the graph and runs all the nodes that provide input to the requested output(s) node(s).
- During a call to `tf.Session.run` any `tf.Tensor` only has a single value.
- Calling `run` on an Operations is `None` and it is done to cause a side-effect. E.g. initialization and training ops.

In [62]:
sess = tf.Session()
print(sess.run(total))
print(sess.run({'ab': (a, b), 'total': total}))  # Pass multiple tensors

7.0
{'ab': (3.0, 4.0), 'total': 7.0}


In [63]:
# Show that during a call to `tf.Session.run` any `tf.Tensor` only has a single value.
vec = tf.random_uniform(shape=(3,))
out1 = vec + 1
out2 = vec + 2
print(sess.run(vec))
print(sess.run(vec))
print(sess.run((out1, out2)))

[0.61810064 0.84265256 0.37434506]
[0.99167466 0.59722066 0.09075904]
(array([1.7061679, 1.347009 , 1.6094939], dtype=float32), array([2.706168 , 2.347009 , 2.6094937], dtype=float32))


## Feeding
- A graph is parameterized to accept external inputs by placeholders.
- A **placeholder** is a promise to provide a value later, like a fcn argument.
- We use `feed_dict`  argument of the run method to feed concrete values to the placeholders.
- `feed_dict argument can be used to overwrite any tensor in the graph. The difference with `tf.Tensors` is that placeholders throw an error if no values is fed to them.

In [64]:
x = tf.placeholder(tf.float32)
y = tf.placeholder(tf.float32)
z = x + y
print(sess.run(z, feed_dict={x: 3, y: 4.5}))
print(sess.run(z, feed_dict={x: [1, 3], y: [2, 4]}))

7.5
[3. 7.]


## Datasets
- Datasets are the preferred method of streaming data into a model
- To get a runnable `tf.Tensor` from a Datset, first it should be converted to a `tf.data.Iterator`, and then call the method `get_next`
- Reaching the end of the data stream causes Dataset to throw an `OutOfRangeError`

In [69]:
my_data = [
    [0, 1,],
    [2, 3,],
    [4, 5,],
    [6, 7,],
]
slices = tf.data.Dataset.from_tensor_slices(my_data)
next_item = slices.make_one_shot_iterator().get_next()

In [70]:
# Reaching the end of the data stream causes Dataset to throw an 
# OutOfRangeError
while True:
    try:
        print(sess.run(next_item))
    except tf.errors.OutOfRangeError:
        break

[0 1]
[2 3]
[4 5]
[6 7]


In [71]:
# If the Dataset depends on stateful operations you may need to 
# initialize the iterator before using it
r = tf.random_normal([10, 3])
dataset = tf.data.Dataset.from_tensor_slices(r)
iterator = dataset.make_initializable_iterator()
next_row = iterator.get_next()

sess.run(iterator.initializer)
while True:
    try:
        print(sess.run(next_row))
    except tf.errors.OutOfRangeError:
        break

[0.13192289 1.602257   0.7300845 ]
[-1.53792     0.22738095  0.8293629 ]
[0.99698305 1.0610259  1.0443759 ]
[-0.04267362  0.7620447  -1.7718678 ]
[-0.8684591  -0.40656278 -1.0453751 ]
[-1.0305398e+00 -1.5117089e+00  1.5716092e-04]
[-0.3321954  1.0547798  0.9447274]
[-1.5558867  -0.39574862  1.8517487 ]
[ 1.0624346  -2.6477983  -0.07072894]
[ 0.49216872  0.6381449  -1.0255786 ]


## Layers
- A trainable model must modify the values in the graph to get new outputs with the same input. Layers are the preferred way to add trainable parameters to a graph.
- Layers package together both the variables and the operations that act on them. For example a densely-connected layer performs a weighted sum across all inputs for each output and applies an optional activation function. The connection weights and biases are managed by the layer object.

### Creating Layers
To apply a layer to an input, call the layer as if it were a function

In [72]:
x = tf.placeholder(tf.float32, shape=[None, 3])
linear_model = tf.layers.Dense(units=1)
y = linear_model(x)

### Initializing Layers
- The layer contains variables that must be initialized before they can be used. 
- While it is possible to initialize variables individually, you can easily initialize all the variables in a TensorFlow graph as follows:

In [73]:
init = tf.global_variables_initializer()
sess.run(init)

### Executing Layers

In [74]:
print(sess.run(y, feed_dict={x: [[1, 2, 3], [4, 5, 6]]}))

[[1.5302558]
 [1.0715563]]


### Layer function shortcuts
- For each layer class (like `tf.layers.Dense`) TensorFlow also supplies a shortcut function (like `tf.layers.dense`). The only difference is that the shortcut function versions create and run the layer in a single call.
- The problem of the above is that it makes introspection and debugging more difficult, and layer reuse impossible.

In [75]:
x = tf.placeholder(tf.float32, shape=[None, 3])
y = tf.layers.dense(x, units=1)

init = tf.global_variables_initializer()
sess.run(init)

print(sess.run(y, {x: [[1, 2, 3], [4, 5, 6]]}))

[[-0.63285863]
 [ 0.15861702]]


# VARIABLES

- A `tf.Variable` represents a tensor whose value can be changed by running ops on it. Unlike `tf.Tensor` objects, it **exists outside** the context of a single `session.run` call.
- A `tf.Variable` stores a persistent tensor. Specific ops allow you to read and modify the values of this tensor. These modifications are visible across multiple `tf.Session`s, so multiple workers can see the same values for a `tf.Variable`.

## Create a Variable
- `tf.get_variable` is the best way to create a variable.
- It requires to specify a Variable's name that will be used by other replicas to access the same variable.
- It allows you to reuse a previously created variable of the same name, making it easy to define models which reuse layers.

In [2]:
# This variable has dtype tf.float32 and 
# is initialized by tf.glorot_uniform_initializer
my_variable = tf.get_variable("my_variable", [1, 2, 3])

In [3]:
# Specify the dtype and initializer
my_int_variable = tf.get_variable("my_int_variable",
                                  [1, 2, 3],
                                  dtype=tf.int32,
                                  initializer=tf.zeros_initializer)

In [4]:
# Initialize a variable to have the value of a Tf.Tensor
other_variable = tf.get_variable("other_variable",
                                 dtype=tf.int32,
                                 initializer=tf.constant([23, 42]))

### Variable Collections
- A **collection** is a named list of tensors or other variables, such as `tf.Variable` instances. It is useful to access easily the variables.
- By default, every `tf.Variable` gets placed in the following 2 collections:
  - `tf.GraphKeys.GLOBAL_VARIABLES`: Variables that can be shared across multiple devices.
  - `tf.GraphKeys.TRAINABLE_VARIABLES`: Variables for which TF will calculate gradients.
- If you don't waht a variable to be trainable, add it to the `tf.GraphKeys.LOCAL_VARIABLES` collection instead or use argument `trainable=False` in `tf.get_variable`.
- It is possible to create your own collections by: `tf.add_to_collection` and to retrieve a list of all the variables (or other objects) with `tf.get_collection`.

In [5]:
# This variables are not trainable, then gradients are not computed
my_local = tf.get_variable("my_local",
                           shape=(),
                           collections=[tf.GraphKeys.LOCAL_VARIABLES])
my_non_trainable = tf.get_variable("my_non_trainable",
                                   shape=(),
                                   trainable=False)

In [6]:
# Create a collection
tf.add_to_collection('my_collection_name', my_local)

# Retrieve all the variables (or other objects) in the collection
tf.get_collection('my_collection_name')

[<tf.Variable 'my_local:0' shape=() dtype=float32_ref>]

### Device Placement
- Like any other TF op, variables can be placed on particular devices
- It is important to put variables on parameter servers and NOT on workers, the opposite can severely slow down raining or, in the worst case, let each worker blithely forge ahead with its own independent copy of each variable. 
- To avoid the aforementioned problem, `tf.train.replica_device_setter` can automatically place variables in parameter servers.

In [46]:
# Put variable in GPU device
with tf.device("/device:GPU:1"):
    v = tf.get_variable('v', [1])

In [11]:
# Use replica_device_setter to place variables in parameter servers.
cluster_spec = {
    'ps': ['ps0:2222', 'ps1:2222'],
    'worker': ['worker0:2222', 'worker1:2222', 'worker2:2222']
}
# This variables is placed in the parameter server by the replica_device_setter
with tf.device(tf.train.replica_device_setter(cluster=cluster_spec)):
    v = tf.get_variable('v', shape=[20, 20])   

## Initializing variables
- Before you can use a variable, it must be initialized.
- Explicit initialization (that is without high-level TF API) allows you not to rerun expensive initializers when reloading a model from a checkpoint as well as determinism when randomly-initialized variables are shared in a distrubuted setting.
- To initialize all trainable variables in `tf.GraphKeys.GLOBAL_VARIABLES` collection: `tf.global_variables_initializer()` 
- To initialize a variable: `session.run(my_variable.initializer)`
- Print the names of all variables which have not yet been initialized: `tf.report_uninitialized_variables()`
- NOTE: `tf.global_variables_initializer` does not specify the order in which variables are initialized. Then if the initial value of one variable depends on another variable's value, you can get an error. For this reason is better to use `variable.initialized_value()` instead of `variable` in the `initializer` parameter of `tf.get_variable`.

In [7]:
session = tf.Session()
# Initialize all variables in tf.GraphKeys.GLOBAL_VARIABLES collection
session.run(tf.global_variables_initializer())

In [9]:
# Initialize a variable yourself
session.run(my_variable.initializer)

In [10]:
# Ask which variables have still not been initialized
print(session.run(tf.report_uninitialized_variables()))

[b'my_local']


In [16]:
# Use of variable.initialized_value() if requires de value of other variable
v = tf.get_variable('v', shape=(), initializer=tf.zeros_initializer())
w = tf.get_variable('w', initializer=v.initialized_value() + 1)

## Using variables
- To use the value of a `tf.Variable` in a TF graph, treat it like a normal `tf.Tensor`
- To assign a value to a variable, use methods: `assign`, `assign_add` in the `tf.Variable` class.
- TF optimizer (see `tf.train.Optimizer`), update efficiently the values of variables accourding to some gradient descent-like algorithm.
- To force a re-read of the value of a variable after something has happended, you can use: `tf.Variable.read_value`.

In [19]:
tf.reset_default_graph()
v = tf.get_variable('v', shape=(), initializer=tf.zeros_initializer())
# w is a tf.Tensor which is computed based on the value of v.
# Any time a variable is used in an expression it gets automatically
# converted to a tf.Tensor representing its value.
w = v + 1

In [24]:
tf.reset_default_graph()
sess = tf.Session()
# Assign a value to a variable
v = tf.get_variable('v', shape=(), initializer=tf.zeros_initializer())
assignment = v.assign_add(1)
sess.run(tf.global_variables_initializer())  # or tf.global_variables_initializer().run()
sess.run(assignment)  # or assignment.op.run(), or assignment.eval()

1.0

In [30]:
# Force to re-read the value of a variable after something has happened.
tf.reset_default_graph()
v = tf.get_variable('v', shape=(), initializer=tf.zeros_initializer())
assignment = v.assign_add(1)
with tf.control_dependencies([assignment]):
    w = v.read_value()  # w is guaranteed to reflect v's value after the assign_add op

## Sharing variables
- TF supports 2 ways of sharing variables:
  - Explicitly passing `tf.Variable` objects around.
  - Implicitly wrapping `tf.Variable` objects within `tf.variable_scope` objects
- While code which explicitly passes variables around is very clear, it is sometimes convenient to write TF functions that implicitly use variables in their implementations. E.g. the functional layers form `tf.layer`, `tf.metrics`.
- Variables scopes allow you to control variable reuse when calling functions which implicitly create and use variables. They also allow you to name your variables in a hierarchical and understandable way.
- If you want to variables to be **shared**: 1) Create it them with `reuse=True`. or 2)call `scope.reuse_variables()` to trigger a reuse

In [43]:
def conv_relu(input, kernel_shape, bias_shape):
    # Create variable named 'weights'
    weights = tf.get_variable('weights', kernel_shape,
                              initializer=tf.random_normal_initializer())
    # Create variable named 'biases'
    biases = tf.get_variable('biases', bias_shape,
                             initializer=tf.constant_initializer(0.0))
    conv = tf.nn.conv2d(input, weights,
                        strides=[1, 1, 1, 1], padding='SAME')
    return tf.nn.relu(conv + biases)

In [48]:
tf.reset_default_graph()
# We cannot exploit the names weights and biases if we call the fcn
# conv_relu many times
input1 = tf.random_normal([1, 10, 10, 32])
input2 = tf.random_normal([1, 20, 20, 32])
x = conv_relu(input1, kernel_shape=[5, 5, 32, 32], bias_shape=[32])
# THIS WILL FAIL:
# x = conv_relu(x, kernel_shape=[5, 5, 32, 32], bias_shape=[32])

By cally conv_relu in different scopes, we can clarify that we want to create new variables:

In [41]:
def my_image_filter(input_images):
    with tf.variable_scope('conv1'):
        # Variables created here will be named 'conv1/weights', 'conv1/biases'
        relu1 = conv_relu(input_images, [5, 5, 32, 32], [32])
    with tf.variable_scope('conv2'):
        # Variables created here will be named 'conv2/weights', 'conv2/biases'
        return conv_relu(relu1, [5, 5, 32, 32], [32])

In [46]:
# Reuse a variable with reuse=True in scope
with tf.variable_scope('model'):
    output1 = my_image_filter(input1)
with tf.variable_scope('model', reuse=True):
    output2 = my_image_filter(input2)

In [49]:
# Reuse a variable with scope.reuse_variables()
with tf.variable_scope('model') as scope:
    output1 = my_image_filter(input1)
    scope.reuse_variables()
    output2 = my_image_filter(output1)

In [None]:
# Since depending on exact string names of scopes can feel dangerous, 
# it's also possible to initialize a variable scope based on another one:
with tf.variable_scope('model') as scope:
    output1 = my_image_filter(input1)
with tf.variable_scope(scope, reuse=True):
    output2 = my_image_filter(input2)