In [1]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

2023-01-27 15:21:25.241660: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
x = tf.Variable(4.0)
with tf.GradientTape() as tape:
    y = x**2  # forward pass

2023-01-27 15:22:19.737535: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
y

<tf.Tensor: shape=(), dtype=float32, numpy=16.0>

In [4]:
# calculates gradient of y with respect to x (the derivative)
dy_dx = tape.gradient(y, x) 
dy_dx


<tf.Tensor: shape=(), dtype=float32, numpy=8.0>

In [5]:
# gradients can be calculated for scalars but also for tensors
w = tf.Variable(tf.random.normal((4, 2))) # random 4x2 tensor
w

<tf.Variable 'Variable:0' shape=(4, 2) dtype=float32, numpy=
array([[ 0.62292063, -0.07569999],
       [-0.6804897 , -0.28200847],
       [-0.5111931 , -1.1992348 ],
       [ 1.1343832 , -0.5215308 ]], dtype=float32)>

In [6]:
b = tf.Variable(tf.ones(2, dtype=tf.float32))
b # one dimensional tensor of ones

<tf.Variable 'Variable:0' shape=(2,) dtype=float32, numpy=array([1., 1.], dtype=float32)>

In [7]:
x = tf.Variable([[10., 20., 30., 40.]], dtype=tf.float32)
x

<tf.Variable 'Variable:0' shape=(1, 4) dtype=float32, numpy=array([[10., 20., 30., 40.]], dtype=float32)>

In [8]:
# GradientTape are released as soon as tape.gradient() is called
# Once invoked, cannot be invoked again for the same computation
# To invoke multiple times, need persistent = True
with tf.GradientTape(persistent=True) as tape:
    y = tf.matmul(x, w) + b
    loss = tf.reduce_mean(y**2)

In [9]:
[dl_dw, dl_db] = tape.gradient(loss, [w, b])
dl_dw # shape is ame as shape of w vector

<tf.Tensor: shape=(4, 2), dtype=float32, numpy=
array([[  236.58948,  -622.3545 ],
       [  473.17896, -1244.709  ],
       [  709.76843, -1867.0634 ],
       [  946.3579 , -2489.418  ]], dtype=float32)>

In [10]:
dl_db #shape is same as shape of the b vector

<tf.Tensor: shape=(2,), dtype=float32, numpy=array([ 23.658947, -62.235447], dtype=float32)>

In [11]:
#keras layers to build neural network model
# gradient tape records all operations made in the forward pass
layer = tf.keras.layers.Dense(2, activation='relu')
x = tf.constant([[10., 20., 30.]])

In [12]:
with tf.GradientTape() as tape:
    y = layer(x)
    loss = tf.reduce_sum(y**2)
    
grad = tape.gradient(loss, layer.trainable_variables)

In [13]:
grad #calculated with respect to all weights and biases

[<tf.Tensor: shape=(3, 2), dtype=float32, numpy=
 array([[0., 0.],
        [0., 0.],
        [0., 0.]], dtype=float32)>,
 <tf.Tensor: shape=(2,), dtype=float32, numpy=array([0., 0.], dtype=float32)>]

In [14]:
x1 = tf.Variable(5.0) #trainable variable
x1

<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=5.0>

In [15]:
x2 = tf.Variable(5.0, trainable=False) # not trainable
x2

<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=5.0>

In [16]:
x3 = tf.add(x1, x2)
x3

<tf.Tensor: shape=(), dtype=float32, numpy=10.0>

In [17]:
x4 = tf.constant(5.0)
x4

<tf.Tensor: shape=(), dtype=float32, numpy=5.0>

In [18]:
with tf.GradientTape() as tape:
    y = (x1**2) + (x2**2) + (x3**2) + (x4**2)
grad = tape.gradient(y, [x1, x2, x3, x4])
grad
# only values for x1 shows up, the gradients for the rest are None

[<tf.Tensor: shape=(), dtype=float32, numpy=10.0>, None, None, None]

In [19]:
# can explicitly configure to calculate gradients for certain tensors
x1 = tf.constant(5.0)
x2 = tf.Variable(3.0)

In [20]:
with tf.GradientTape() as tape:
    tape.watch(x1) #explicitly choose what values to track in computation
    y = (x1**2) + (x2**2)

In [21]:
[dy_dx1, dy_dx2] = tape.gradient(y, [x1, x2])
dy_dx1, dy_dx2

(<tf.Tensor: shape=(), dtype=float32, numpy=10.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=6.0>)

In [23]:
# gradient tape automatically tracks all variables
# can turn off to explicitly choose what it watches
with tf.GradientTape(watch_accessed_variables=False) as tape:
    tape.watch(x1)
    y = (x1**2) + (x2**2)

In [24]:
[dy_dx1, dy_dx2] = tape.gradient(y, [x1, x2])
dy_dx1, dy_dx2
# x2 is none now

(<tf.Tensor: shape=(), dtype=float32, numpy=10.0>, None)

In [26]:
x = tf.constant(1.0)
x1 = tf.Variable(5.0)
x2 = tf.Variable(3.0)

In [28]:
with tf.GradientTape(persistent=True) as tape:
    tape.watch(x)
    if x > 0.0:
        result = x1**2
    else:
        result = x2**2
dx1, dx2 = tape.gradient(result, [x1, x2])
dx1, dx2
# get gradient with respect to x1 (10 = 5x2)
# if x = -1 (< 0), would get gradient with respect to x2 (3x2)

(<tf.Tensor: shape=(), dtype=float32, numpy=10.0>, None)

In [29]:
x = tf.Variable(2.)
y = tf.Variable(3.)
with tf.GradientTape() as tape:
    z = y * y
    dy_dx = tape.gradient(z, x)
print(dy_dx)
# will get None if gradient is computed using something else

None
