In [1]:
from __future__ import print_function

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # Adjust verbosity to suppress information logs

import numpy as np
import tensorflow as tf
from tensorflow.contrib.tensorboard.plugins import projector # For visualizing embeddings

**Gradients**

- Automatic differentiation
- `tf.gradienets(y, [xs])` - Take derivative of `y` with respect to each tensor in the list `[xs]`

---

Example:

$y = 2x^3$ and $z = 3 + y^2$

In [6]:
x = tf.Variable(initial_value=2.0)
y = 2.0 * (x**3)
z = 3.0 + y**2

grad_z = tf.gradients(z, [x, y])

with tf.Session() as sess:
    sess.run(x.initializer)
    print(sess.run(grad_z))

[768.0, 32.0]


**Structure Model**
- Need models to be reusable - Object Oriented Programming

**Managing Experiments**

- **`tf.train.Saver()`**
    - Saves the graph's variables in binary files
    - Step at which graph variables are saved is called a checkpoint
    - Periodically save the model's parameters after a certain number of steps and allows to restore/retrain model from last saved checkpoint
    - `tf.train.Saver().save()` stores all variables of the graph by default
    - `tf.train.Saver().save()` only save variables not the entire graph so need to create the graph and then load in variables

`tf.train.Saver.save(sess, save_path, global_step, latest_filename=None, meta_graph_suffix='meta', write_meta_graph=True, write_state=True)`

```python
# Define model

# Create a saver object
saver = tf.train.Saver()

# Launch a session to compute ethe graph
with tf.Session() as sess:
    # Training loop
    for step in range(training_steps):
        sess.run([optimizer])
        
        if (step + 1) % 1000 == 0:
            saver.save(sess, 'checkpoint_directory/model_name',
                       global_step=model.global_step)

```

- **`Global Step`**
    - During training many checkpoints will be created, so it is helpful to append number of training steps the model has gone through in a **variable** called `global_step`
    -  After creating **variable** `global_step`, it is initialized to 0 and **set as to be not trainable**
    - Pass `global_step` as a parameter to the optimizer so that optimizer increments `global_step` by 1 after each training step

```python
# Create global_step variable
self.global_step  =  tf.Variable(0, dtype=tf.int32 ,trainable=False, name='global_step')

# Pass global_step as a parameter to the optimizer
self.optimizer = tf.train.GradientDescentOptimizer(self.lr).minimize(self.loss, global_step=self.global_step)

# Save the session's variables in the directory `checkpoints` with name `model-name-global-step` (e.g. *skip-gram-1000*)
saver.save(sess, 'checkpoint_directory/model_name', global_step=model.global_step)

# Restore the session at 1000th step
saver.restore(sess, 'checkpoints/skip-gram-1000')

# # Passing what variables to store by passing them in a list or a dict
v1 = tf.Variable(..., name='v1')
v2 = tf.Variable(..., name='v2')

# Pass the variables as a dict
saver = tf.train.Saver({'v1': v1, 'v2': v2})

# Pass the variables as a list
saver = tf.train.Saver([v1, v2])

# Passing a list is equivalent to passing a dict with the variable op names as keys
saver = tf.train.Saver({v.op.name: v for v in [v1, v2]})
```
     
- **`tf.train.get_checkpoint_state('checkpoint_directory_path')`**
    - Allows to get checkpoint from a directory
    
```python
# The file checkpoint automatically updates the path to the latest checkpoint
checkpoint = tf.train.get_checkpoint_state(os.path.dirname('checkpoints/checkpoint'))
if checkpoint and checkpoint.model_checkpoint_path:
    saver.restore(sess, checkpoint.model_checkpoint_path)
```

- **`tf.summary`**
    - Visualize summary statistics during training (via TensorBoard)
    - Create a new `tf.name_scope(...)` to hold all the summary operations
    - Because summary is an operation - Need to execute it with `sess.run()` to obtain it
    - After obtaining summary - Need to write summary to file
    
```python
def _create_summaries(self):
    with tf.name_scope('summaries'):
        tf.summary.scalar('loss', self.loss)
        tf.summary.scalar('accuracy', self.accuracy)
        tf.summary.histogram('histogram_loss', self.loss)
        
        # Merge several summaries into one operation (op) to make it easier to manage
        self.summary_op = tf.summary.merge_all()
        

# To write `Summary` protocol buffers to event files
writer = tf.summary.FileWriter('skip-gram-graph/lr' + str(self.learning_rate), sess.graph)
            
with tf.Session() as sess:
    batch_loss, _, summary = sess.run([model.loss, model.optimizer, model.summary_op], feed_dict=feed_dict)
    
    # Write summary to file
    writer.add_summary(summary, global_step=global_step)
    
```

**Control Randomization**
- Set random seed at operation level. Each operation keeps its own seed
- Set random seed at graph level with `tf.set_random_seed(seed)` (to be able to replicate result on another graph

In [18]:
# Set random seed at operation level
c = tf.random_uniform([], minval=-10, maxval=10, seed=2)
d = tf.random_uniform([], minval=-10, maxval=10, seed=4)

with tf.Session() as sess:
    print(sess.run(c))
    print(sess.run(c))
    print(sess.run(c))
    print()
    
# Each new session restarts the random state 
with tf.Session() as sess:
    print(sess.run(c))
    
with tf.Session() as sess:
    print(sess.run(c))
    
print()
with tf.Session() as sess:
    print(sess.run(c))
    print(sess.run(d))

3.57493
-5.97319
-0.02878

3.57493
3.57493

3.57493
9.13163


In [23]:
# Set random seed at graph level
tf.set_random_seed(3)

c = tf.random_uniform([], minval=-10, maxval=10)
d = tf.random_uniform([], minval=-10, maxval=10)

with tf.Session() as sess:
    print(sess.run(c))
    print(sess.run(d))

7.7782
2.03784


**Reading Data in TensorFlow**
- There are 2 main ways to load data into a TensorFlow graph
    - Using `feed_dict` - It sends data from storage system to the client and then from clienet to the worker process. This will cause the data to slow down if the client is on a different machine from the worker process
    - Using TensorFlow readers - Allows to load data directley into the worker process
    
- `tf.TextLineReader` - Outputs the lines of a file delimieted by new lines, e.g. text files, CSV files
- `tf.FixedLenFeature` - Outputs the entire file when all files have same fixed lengts, e.g. each MNIST file has 28 x 28 pixels
- `tf.WholeFileReader` - Outputs the entire file content
- `tf.TFRecordReader` - Read samples from TensorFlow's own binary format (`TFRecord`)
- `tf.ReaderBase` - Allows you to create your own readers

### New

### Autodiff

- TensorFlow uses *reverse mode automatic differentiation*: It allows one to take derivative of a function at roughly the same cost as computing the original function.
- `tf.gradients(y, [x1, x2, ...])` - Take derivative of `y` with respect to each tensor in the list

In [2]:
x = tf.Variable(2.0)
y = 2.0 * x**3
z = 3.0 + y**2

grad_z = tf.gradients(z, [x, y])

with tf.Session() as sess:
    sess.run(x.initializer)
    print(sess.run(grad_z))

[768.0, 32.0]


### Name Scope
- If names are given tensors and ops: In TensorBoard the nodes are scattered all over the place and this makes graph difficult to read. TensorBoard does not know which nodes are similar to which nodes and should be grouped together.

- Name scope - Creates namespace and **groups all ops related to a name together**

### Variable Sharing

- Variable scope - Creates namespace and **facilitate variable sharing**. It consists of 2 main functions:
    - `tf.get_variable(<name>, <shape>, <initializer>)` - Creates or returns a variable with a given name
    - `tf.variable_scope(<scope_name>)` - Manages namespaces for names passed to `tf.get_variable(...)`

In [4]:
# Name scope
with tf.name_scope('constant'):
    pi = tf.constant(3.14, name='pi')
    golden_ratio = tf.constant(1.618, name='golden_ratio')
    
with tf.Session() as sess:
    print(sess.run(pi))
    print(sess.run(golden_ratio))

print(pi.op.name)
print(golden_ratio.op.name)

3.14
1.618
constant/pi
constant/golden_ratio


In [5]:
# Need for Variable scope

# We want to create a NN with 2 hidden layers. 
def two_hidden_layer_net(x):
    assert x.shape.as_list() == [200, 100]
    w1 = tf.Variable(tf.random_normal([100, 50]), name='h1_weights')
    b1 = tf.Variable(tf.zeros([50]), name='h1_biases')
    h1 = tf.matmul(x, w1) + b1
    assert h1.shape.as_list() == [200, 50]
    w2 = tf.Variable(tf.random_normal([50, 10]), name='h2_weights')
    b2 = tf.Variable(tf.zeros([10]), name='h2_biases')
    logits = tf.matmul(h1, w2) + b2
    return logits

# 2 different input x1 and x2
x1 = tf.truncated_normal([200, 100], name='x1')
x2 = tf.truncated_normal([200, 100], name='x2')

# Call network on different inputs
logits1 = two_hidden_layer_net(x1)
logits2 = two_hidden_layer_net(x2)

print(logits1)  # Variables: h1_weights, h1_biases, etc.
print(logits2)  # Variables: h1_weights_1, h1_biases_1, etc.

Tensor("add_2:0", shape=(200, 10), dtype=float32)
Tensor("add_4:0", shape=(200, 10), dtype=float32)


Each time the network is called, TensorFlow creates a different set of variables. However, the network should share the same variables for all inputs. To share variables use `tf.get_variable()`

- `tf.get_variable()` - First checks whether that variable exists. If it exists then reuse it, if not then create a new one

In [6]:
# We want to create a NN with 2 hidden layers. 
def two_hidden_layer_net(x):
    assert x.shape.as_list() == [200, 100]
    w1 = tf.get_variable(name='h1_weights', shape=[100, 50], 
                         initializer=tf.random_normal_initializer())
    b1 = tf.get_variable(name='h1_biases', shape=[50], 
                         initializer=tf.constant_initializer(0.0))
    h1 = tf.matmul(x, w1) + b1
    assert h1.shape.as_list() == [200, 50]
    w2 = tf.get_variable(name='h2_weights', shape=[50, 10], 
                         initializer=tf.random_normal_initializer())
    b2 = tf.get_variable(name='h2_biases', shape=[10], 
                         initializer=tf.constant_initializer(0.0))
    logits = tf.matmul(h1, w2) + b2
    return logits

# 2 different input x1 and x2
x1 = tf.truncated_normal([200, 100], name='x1')
x2 = tf.truncated_normal([200, 100], name='x2')

# Call network on different inputs
#logits1 = two_hidden_layer_net(x1)
#print(logits1)  

#logits2 = two_hidden_layer_net(x2)
#print(logits2)  

# ValueError: Variable already exists
# To avoid the above error, put all variables in a Variable Scope

In [7]:
# Variable Scope

with tf.variable_scope('two_layers', reuse=tf.AUTO_REUSE) as scope:
    logits1 = two_hidden_layer_net(x1)
    print(logits1)
    # scope.reuse_variables() # Redundant if using reuse?
    logits2 = two_hidden_layer_net(x2)
    print(logits2)

Tensor("two_layers/add_1:0", shape=(200, 10), dtype=float32)
Tensor("two_layers/add_3:0", shape=(200, 10), dtype=float32)


In [8]:
def fully_connected(x, output_dim, scope):
    with tf.variable_scope(scope) as scope:
        w = tf.get_variable('weights', shape=[x.shape[1], output_dim], 
                            initializer=tf.random_normal_initializer())
        b = tf.get_variable('biases', shape=[output_dim], 
                            initializer=tf.constant_initializer(0.0))
        return tf.matmul(x, w) + b
    
def two_hidden_layers_net(x):
    h1 = fully_connected(x, output_dim=50, scope='h1')
    h2 = fully_connected(h1, output_dim=10, scope='h2')
    return h2

# 2 different input x1 and x2
x1 = tf.truncated_normal([200, 100], name='x1')
x2 = tf.truncated_normal([200, 100], name='x2')

## ValueError
# with tf.Session() as sess:
#     sess.run(tf.global_variables_initializer())
#     logits1 = sess.run(two_hidden_layer_net(x1))
#     logits2 = sess.run(two_hidden_layer_net(x2))

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    
    with tf.variable_scope('two_layers', reuse=tf.AUTO_REUSE) as scope:
        logits1 = sess.run(two_hidden_layer_net(x1))
        print('logits1: ', logits1.shape)
        logits2 = sess.run(two_hidden_layer_net(x2))
        print('logits2: ', logits2.shape)

logits1:  (200, 10)
logits2:  (200, 10)


In [9]:
def foo():
    with tf.variable_scope("foo", reuse=tf.AUTO_REUSE):
        v = tf.get_variable("v", [1])
    return v

v1 = foo()  # Creates v.
v2 = foo()  # Gets the same, existing v.
assert v1 == v2
print(v1)
print(v2)

<tf.Variable 'foo/v:0' shape=(1,) dtype=float32_ref>
<tf.Variable 'foo/v:0' shape=(1,) dtype=float32_ref>


### Graph Collections

- `tf.get_collection` - Lets one access a certain collection of variables, with `key` being the name of the collection and `scope` is the scope of the variables. By default all variables are placed in `tf.GraphKeys.GLOBAL_VARIABLES`. *It can be used to freeze weights during transfer learning*
- `tf.add_to_collection(name, value)` - Create a collection

In [10]:
with tf.variable_scope('graph', reuse=tf.AUTO_REUSE) as scope:
    w = tf.get_variable('weights', shape=[10, 3], 
                        initializer=tf.random_normal_initializer())
    b = tf.get_variable('biases', shape=[3], 
                        initializer=tf.constant_initializer(0.0))
    
c = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='graph')
print(c)
print()

c = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='graph')
print(c)

[<tf.Variable 'graph/weights:0' shape=(10, 3) dtype=float32_ref>, <tf.Variable 'graph/biases:0' shape=(3,) dtype=float32_ref>]

[<tf.Variable 'graph/weights:0' shape=(10, 3) dtype=float32_ref>, <tf.Variable 'graph/biases:0' shape=(3,) dtype=float32_ref>]


## Managing Experiments

**Tools**

- **`tf.train.Saver()`** - Saves the graph's variables in binary files. It Allows to periodically save the model's parameters after certain numbers of steps or epochs, and to restore/retrain models from some step. The step at which graph variables is called a `checkpoint`
    - By default `Saver` stores all variables of the graph (recommended), however chosen variables can be stored by passing them in as a list or a dict when creating saver object, e.g. `saver = tf.train.Saver([w, b])`

- **`global_step`** - During training many checkpoints are created, so it is helpful to append the number of training steps the model has gone through, it is done by creating a variable called `global_step`, initializing it to 0, and setting it to be not trainable. 
    - NOTE: Need to pass `global_step` as a parameter to the optimizer so it knows to increment `global_step` by 1 with each training step.
    
- **`tf.summary.FileWriter(directory, graph)`** - Provides a mechanism to create an event file in a given directory and add summaries and events to it.
    
- **`tf.summary`** - Collects summary statistics during training. Because it is an op, it needs to be executed with `sess.run()`. After obtaining summary, write the summary to file using `FileWriter`

In [11]:
# Create a saver object
saver = tf.train.Saver()

# Global step
global_step = tf.Variable(0, dtype=tf.int32, trainable=False, name='global_step')

with tf.variable_scope('weights', reuse=tf.AUTO_REUSE) as scope:
    w = tf.get_variable('weights', shape=[2, 2], 
                        initializer=tf.random_normal_initializer())

# Launch a session to execute computation and save variables
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    out = sess.run(w)
    print(out)
    
    # Save the variable
    saver.save(sess, 'checkpoints/saver', global_step=global_step)

[[-1.7003365   0.63909185]
 [-1.662971    0.07174451]]


#### Control Randomization

- Op level random seed
    - Each new session restarts the random state
    - Each Op keeps it own seed

In [14]:
c = tf.random_uniform([], minval=-10, maxval=10, seed=3)

with tf.Session() as sess:
    print(sess.run(c))
    print(sess.run(c))
    
print()
with tf.Session() as sess:
    print(sess.run(c))

-2.2614121
3.2668896

-2.2614121


In [15]:
c = tf.random_uniform([], -10, 10, seed=3)
d = tf.random_uniform([], -10, 10, seed=3)

with tf.Session() as sess:
    # c and d will be same
    print('c: ', sess.run(c)) 
    print('d: ', sess.run(d))

c:  -2.2614121
d:  -2.2614121


- Graph level random seed
    - Result is different from op-level seed

In [16]:
tf.set_random_seed(3)

c = tf.random_uniform([], -10, 10)
d = tf.random_uniform([], -10, 10)

with tf.Session() as sess:
    # c and d will be different
    print('c: ', sess.run(c))
    print('d: ', sess.run(d))

c:  -5.4093623
d:  -9.190879
