In [27]:
import numpy as np
import os

%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt

import time

In [28]:
#utility function
#make this notebook output stable across runs
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

# Creating and running a graph

In [29]:
import tensorflow as tf

In [30]:
# create variable on the default graph
reset_graph()

#variables and constants
x=tf.Variable(3, name="x")
y=tf.Variable(4, name="y")
a=tf.constant(2, name="a")

In [31]:
#are these nodes on the graph?
print(x.graph is tf.get_default_graph())
print(y.graph is tf.get_default_graph())
print(a.graph is tf.get_default_graph())

True
True
True


In [32]:
# Note: we can add nodes to a specific graph:
graph = tf.Graph()
with graph.as_default():
    x2 = tf.Variable(2)
print(x2.graph is graph)
print(x2.graph is tf.get_default_graph())
#if we want to remove duplicates or unwanted nodes"
#tf.reset_default_graph()

True
False


In [33]:
#variable constants are not yet initialized
x,y,a

(<tf.Variable 'x:0' shape=() dtype=int32_ref>,
 <tf.Variable 'y:0' shape=() dtype=int32_ref>,
 <tf.Tensor 'a:0' shape=() dtype=int32>)

In [34]:
f = x*x*y + y +a

In [35]:
#example of lazy evaluation -->tensor of type add
#no value of f yet
f

<tf.Tensor 'add_1:0' shape=() dtype=int32>

## To evaluate the graph, we open a TensorFlow session
A TF session initializes alol variables and evaluate the graph. It puts graph operations on a CPU or GPU (or a cluster), and holds all the variable values

In [36]:
sess = tf.Session()
sess.run(x.initializer)
sess.run(y.initializer)
result = sess.run(f)
a_val = a.eval(session=sess) # constant does not need to be initialize
print('a =', a_val)
print('result =', result)

a = 2
result = 42


In [37]:
#Run a session with automatic closing at the end --> using the with construction
with tf.Session() as sess:
    x.initializer.run()
    y.initializer.run()
    result = f.eval()
    a_val = a.eval()
print('a =', a_val)
print('result =', result)

a = 2
result = 42


## Initialization of all variables at once
Code can be made even shorter if we introduce a new node on the graph that takes car of initialization of all labels at once

In [38]:
init = tf.global_variables_initializer() # prepare aan init node on the graph

with tf.Session() as sees:
    init.run()
    result = f.eval()
print(result)

42


In [39]:
init #what node was created by tensorflow

<tf.Operation 'init' type=NoOp>

## Lyfecycle of a node value
When you creat a node, it only adds a value to the executing phase when you run a TensorFlow session. 

In [40]:
w = tf.constant(3) #node for constant w
#we create 3 tensors
x = w + 2 
y = x + 5
z = x * 3

#what if we want to find the values of y and z?
#code evaluate w and x twice below
# --> TF graph would be reversed twice to compute the values of y and x
# independently of each other --> even though both y and z use same value
#of x
with tf.Session() as sess:
    print(y.eval()) #10
    print(z.eval()) #15

10
15


## All node values are dropped between runs except variable values
A variable starts its life when its initializer is run, and ends it when the session is closed

In [41]:
x

<tf.Tensor 'add_2:0' shape=() dtype=int32>

### We can do this more efficiently by telling TF to do all the calculations in one parse on the graph.

In [42]:
with tf.Session() as sess:
    y_val, z_val = sess.run([y, z])
    print(y_val)
    print(z_val)

10
15


## Reverse-mode autodiff in TF
Define a composite function:
$$f(w) = e^{w_{20}+w_{21}\cdot e^{w_{10}+w_{11}\cdot e^{w_{00}+w_{01}\cdot x}}}$$
It's an exponent of an exponent of an exponent. 


In [43]:
def my_func(w, x):
    f_0 = tf.exp(w[0,0]+w[0, 1]*x) # the inner-most function
    f_1 = tf.exp(w[1,0]+w[1,1]*f_0) # the next_level function
    f_2 = tf.exp(w[2,0]+w[2,1]*f_1) #the output function
    
    return f_2, f_1, f_0

In [44]:
# a fancier implementation using name scope 
#we define each layer within it's own scope
# this is useful for later visualization of the tensorflow graph and
#where it belongs 
def my_func(w, x):
    with tf.name_scope("f_0_level") as scope_0:
        f_0 = tf.exp(w[0,0]+w[0, 1]*x)
    with tf.name_scope("f_1_level") as scope_1:
        f_1 = tf.exp(w[1,0]+w[1,1]*f_0)
    with tf.name_scope("f_2_level") as scope_2:
         f_2 = tf.exp(w[2,0]+w[2,1]*f_1)
    return f_2, f_1, f_0

### Let's specify a point at which we want to compute the derivate. In this example we want a point where all intercepts equal zero and all slopes are ones

In [46]:
# w_0 is a point at which we want to compute the function and its derivatives
# w_0 = np.random.rand(3,2)
w_0 = np.vstack((np.zeros(3), np.ones(3))).T

In [47]:
w_0

array([[0., 1.],
       [0., 1.],
       [0., 1.]])

A manual check of derivatives at  $$w = w_{0}$$

$$ \frac{\partial f}{\partial w_{20}}=\frac{\partial f}{\partial f{2}}=f_{2}(w_{0})$$

$$ \frac{\partial f}{\partial w_{21}}=\frac{\partial f}{\partial f{2}}\cdot f_{1}(w_{0})=f_{2}(w_{0})\cdot f_{1}(w_{0})$$

$$ \frac{\partial f_{2}}{\partial w_{10}}=\frac{\partial f}{\partial f{2}}\cdot w_{21} \cdot \frac{\partial f_{1}}{\partial w_{10}} = w_{21}\cdot f_{2}(w_{0})\cdot f_{1}(w_{0})=f_{2}(w_{0})\cdot f_{1}(w_{0})$$

$$ \frac{\partial f_{2}}{\partial w_{11}}=\frac{\partial f}{\partial f{2}}\cdot w_{21} \cdot \frac{\partial f_{1}}{\partial w_{11}} = w_{21}\cdot f_{2}(w_{0})\cdot f_{1}(w_{0})\cdot f_{0}(w_{0})=f_{2}(w_{0})\cdot f_{1}(w_{0})\cdot f_{0}(w_{0})$$

$$ \frac{\partial f_{2}}{\partial w_{00}}=\frac{\partial f}{\partial f{2}}\cdot w_{21}\frac{\partial f_{1}}{\partial z}\cdot w_{11}\cdot \frac{\partial f_{0}}{\partial w_{00}}=f_{2}(w_{0})\cdot f_{1}(w_{0})\cdot f_{0}(w_{0})$$

$$ \frac{\partial f_{2}}{\partial w_{01}}=\frac{\partial f}{\partial f{2}}\cdot w_{21}\frac{\partial f_{1}}{\partial z}\cdot w_{11}\cdot \frac{\partial f_{0}}{\partial w_{01}}=f_{2}(w_{0})\cdot f_{1}(w_{0})\cdot f_{0}(w_{0})\cdot 1$$

In [49]:
reset_graph()

#variables and their initialization 
w = tf.Variable(w_0, name='w', dtype=tf.float32)
x = tf.Variable(1.0, name='x', dtype=tf.float32, trainable=False)

#we call our function and return the values for the functions
f_2, f_1, f_0 = my_func(w, x)

#Define nodes for the gradients of the outer function 
#(f2, with respect to all parameters w in a function) 
# by TensorFlow's autofiff
grads = tf.gradients(f_2, w)
#the evaluation above is done by simply calling tf.gradients
#with arguments f2 (the name of the function) and the variables with
#respect to which we want to call to compute the gradients

grads

[<tf.Tensor 'gradients/AddN:0' shape=(3, 2) dtype=float32>]

In [50]:
#here is where we run our TF graph
#a node for the initializer
init = tf.global_variables_initializer()

#Run the session

t_0 = time.time()

with tf.Session() as sess:
    sess.run(init)
    
    # we can run it twice
    #first compute the function values
    # function_vals = sess.run(f_2, f_1, f_0)
    #then the values of the gradients
    # gradients = sess.run(grads)
    #or we can just use this one line syntax
    gradients, function_vals = sess.run([grads, [f_2, f_1, f_0]])
    
print("Computed Derivatives in %f3.2 sec" % (time.time() - t_0))
print(" Function values = ", function_vals)
print("Gradients =", gradients)


Computed Derivatives in 0.0377103.2 sec
 Function values =  [3814273.0, 15.154261, 2.7182817]
Gradients = [array([[1.5712344e+08, 1.5712344e+08],
       [5.7802488e+07, 1.5712344e+08],
       [3.8142730e+06, 5.7802488e+07]], dtype=float32)]


In [51]:
#We check that the function after the session is 
#over returns the uninitialized value of the tensor
my_func(w_0, x)

(<tf.Tensor 'f_2_level_1/Exp:0' shape=() dtype=float32>,
 <tf.Tensor 'f_1_level_1/Exp:0' shape=() dtype=float32>,
 <tf.Tensor 'f_0_level_1/Exp:0' shape=() dtype=float32>)

### Visualize the tensorflow graph