In [1]:
#@title Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

In [2]:
from __future__ import absolute_import, division, \
    print_function, unicode_literals

In [3]:
import tensorflow as tf

In [4]:
tf.enable_eager_execution()

In [5]:
# Gradient Tapes
# tf.GradientTape API enables automatic differentiation, 
# computing the gradient of a computation with respect to its 
# input variables.
# TF records all ops executed inside the context of a
# tf.GradientTape onto a "tape".
# TF then uses that tape and the gradients associated with each
# recorded op to compute the gradients of a "recorded" computation
# using reverse mode differentiation.
# Example:
x = tf.ones((2,2))

In [6]:
with tf.GradientTape() as t:
    t.watch(x)
    y = tf.reduce_sum(x)
    z = tf.multiply(y,y)

In [7]:
# Derivative of z with respect to the original input tensor x
dz_dx = t.gradient(z,x)

In [8]:
dz_dx.numpy()

array([[8., 8.],
       [8., 8.]], dtype=float32)

In [9]:
# Can also request gradients of the output w.r.t intermediate
# values computed during a recorded tf.GradientTape context.
x = tf.ones((2,2))

In [10]:
with tf.GradientTape() as t:
    t.watch(x)
    y = tf.reduce_sum(x)
    z = tf.multiply(y,y)

In [11]:
dz_dy = t.gradient(z,y)

In [12]:
dz_dy.numpy()

8.0

In [13]:
# By default, the resources held by a GradientTape are released as
# soon as GradientTape.gradient() method is called.
# To compute multiple gradients over the same computation, create
# a persistent gradient tape.
# This allows multiple calls to the gradient() method as resources
# are released when the tape object is garbage collected.

In [14]:
x = tf.constant(3.0)

In [15]:
x.numpy()

3.0

In [16]:
with tf.GradientTape(persistent=True) as t:
    t.watch(x)
    y = x * x
    z = y * y
    

In [17]:
dz_dx = t.gradient(z,x)

In [18]:
dy_dx = t.gradient(y,x)

In [19]:
dz_dx

<tf.Tensor: id=41, shape=(), dtype=float32, numpy=108.0>

In [20]:
dy_dx

<tf.Tensor: id=45, shape=(), dtype=float32, numpy=6.0>

In [22]:
del t

NameError: name 't' is not defined

In [24]:
# Record control flow

In [25]:
def f(x,y):
    output = 1.0
    for i in range(y):
        if i > 1 and i < 5:
            output = tf.multiply(output, x)
    return output

In [26]:
def grad(x,y):
    with tf.GradientTape() as t:
        t.watch(x)
        out = f(x,y)
    return t.gradient(out, x)

In [27]:
x = tf.convert_to_tensor(2.0)

In [28]:
assert grad(x,6).numpy() == 12.0

In [29]:
assert grad(x,5).numpy() == 12.0

In [30]:
assert grad(x,4).numpy() == 4.0

In [31]:
# Higher-order Gradients

# Create a TF variable initialized to 1.0
x = tf.Variable(1.0)

In [32]:
with tf.GradientTape() as t:
    with tf.GradientTape() as t2:
        y = x*x*x
    # Compute the gradient inside the 't' context manager
    # which means the gradient computation is differentiable also
    dy_dx = t2.gradient(y,x)
d2y_dx2 = t.gradient(dy_dx,x)

In [33]:
dy_dx.numpy()

3.0

In [34]:
d2y_dx2.numpy()

6.0