In [1]:
import tensorflow as tf
print(tf.__version__)

# import os
# os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
# tf.test.is_gpu_available( cuda_only=False, min_cuda_compute_capability=None )

2.0.0


## 2.3.1 a simple example

In [2]:
x = tf.reshape(tf.Variable(range(4), dtype=tf.float32),(4,1))
x

<tf.Tensor: id=10, shape=(4, 1), dtype=float32, numpy=
array([[0.],
       [1.],
       [2.],
       [3.]], dtype=float32)>

In [3]:
with tf.GradientTape() as t:
    t.watch(x)
    y = 2 * tf.matmul(tf.transpose(x), x)
    
dy_dx = t.gradient(y, x)
dy_dx

<tf.Tensor: id=30, shape=(4, 1), dtype=float32, numpy=
array([[ 0.],
       [ 4.],
       [ 8.],
       [12.]], dtype=float32)>

## 2.3.2 training mode and predicting mode

In [4]:
with tf.GradientTape(persistent=True) as g:
    g.watch(x)
    y = x * x
    z = y * y
    dz_dx = g.gradient(z, x)  # 108.0 (4*x^3 at x = 3)
    dy_dx = g.gradient(y, x)  # 6.0
dz_dx,dy_dx



(<tf.Tensor: id=41, shape=(4, 1), dtype=float32, numpy=
 array([[  0.],
        [  4.],
        [ 32.],
        [108.]], dtype=float32)>,
 <tf.Tensor: id=47, shape=(4, 1), dtype=float32, numpy=
 array([[0.],
        [2.],
        [4.],
        [6.]], dtype=float32)>)

In [5]:
help(tf.GradientTape)

Help on class GradientTape in module tensorflow.python.eager.backprop:

class GradientTape(builtins.object)
 |  GradientTape(persistent=False, watch_accessed_variables=True)
 |  
 |  Record operations for automatic differentiation.
 |  
 |  Operations are recorded if they are executed within this context manager and
 |  at least one of their inputs is being "watched".
 |  
 |  Trainable variables (created by `tf.Variable` or `tf.compat.v1.get_variable`,
 |  where `trainable=True` is default in both cases) are automatically watched.
 |  Tensors can be manually watched by invoking the `watch` method on this context
 |  manager.
 |  
 |  For example, consider the function `y = x * x`. The gradient at `x = 3.0` can
 |  be computed as:
 |  
 |  ```python
 |  x = tf.constant(3.0)
 |  with tf.GradientTape() as g:
 |    g.watch(x)
 |    y = x * x
 |  dy_dx = g.gradient(y, x) # Will compute to 6.0
 |  ```
 |  
 |  GradientTapes can be nested to compute higher-order derivatives. For example,
 | 

## 2.3.3 gradient inside python control stream

In [6]:
def f(a):
    b = a * 2
    while tf.norm(b) < 1000:
        b = b * 2
    if tf.reduce_sum(b) > 0:
        c = b
    else:
        c = 100 * b
    return c

In [7]:
a = tf.random.normal((1,1),dtype=tf.float32)
with tf.GradientTape() as t:
    t.watch(a)
    c = f(a)
t.gradient(c,a) == c/a

<tf.Tensor: id=201, shape=(1, 1), dtype=bool, numpy=array([[ True]])>