In [1]:
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
import numpy as np

### Finding gradients (derivatives) of differentiable functions using GradientTape

In [2]:
a = tf.random.normal(shape=(2, 2))
b = tf.random.normal(shape=(2, 2))

with tf.GradientTape() as tape:
    tape.watch(a) 
    c = tf.sqrt(tf.square(a) + tf.square(b))  
    
grads = tape.gradient(c, a)
print(grads)

tf.Tensor(
[[-0.65299165  0.30126494]
 [-0.73170257 -0.99864626]], shape=(2, 2), dtype=float32)


In [3]:
a / tf.sqrt(tf.square(a) + tf.square(b))

<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[-0.65299165,  0.30126497],
       [-0.73170257, -0.99864626]], dtype=float32)>

In [4]:
x = tf.random.normal(shape=())

with tf.GradientTape() as tape:
    tape.watch(x)
    y = tf.sin(x) 
    
grads = tape.gradient(y, x)
print(grads, tf.cos(x))

tf.Tensor(0.9783102, shape=(), dtype=float32) tf.Tensor(0.9783102, shape=(), dtype=float32)


### Derivative with respect to a and b

In [5]:
a = tf.random.normal(shape=(2, 2))
b = tf.random.normal(shape=(2, 2))

with tf.GradientTape() as tape:
    tape.watch([a, b]) 
    c = tf.sqrt(tf.square(a) + tf.square(b))  
    
grads = tape.gradient(c, [a, b])

print(grads)
print(a / tf.sqrt(tf.square(a) + tf.square(b)))
print(b / tf.sqrt(tf.square(a) + tf.square(b)))

[<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[-0.5640464 ,  0.96908313],
       [ 0.938552  , -0.06914212]], dtype=float32)>, <tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[ 0.8257431,  0.2467344],
       [-0.3451379, -0.9976069]], dtype=float32)>]
tf.Tensor(
[[-0.56404644  0.96908313]
 [ 0.938552   -0.06914211]], shape=(2, 2), dtype=float32)
tf.Tensor(
[[ 0.82574314  0.24673438]
 [-0.34513792 -0.9976068 ]], shape=(2, 2), dtype=float32)


### Use GradientTape with loss function and backpropagation to update weights

In [6]:
a = tf.random.uniform(shape=(4,2)) 
b = tf.random.uniform(shape=(4,2))
c = tf.random.uniform(shape=(4,2))
d = tf.random.uniform(shape=(4,2))

a = tf.Variable(a)
b = tf.Variable(b)
c = tf.Variable(c)
d = tf.Variable(d)

In [7]:
x = tf.random.normal(shape=(4,2))

In [8]:
loss_function = keras.losses.MeanSquaredError()
optimizer = keras.optimizers.SGD(learning_rate=1e-3)

for i in range(10000):
    with tf.GradientTape() as tape:
        tape.watch(x)
        y = tf.sin(x) 
        y_pred = a + b*x + c*x**2 + d*x**3
        loss = loss_function(y_pred, y)
    grads = tape.gradient(loss, [a,b,c,d])
    optimizer.apply_gradients(zip(grads, [a,b,c,d]))

In [9]:
print(y_pred, y)

tf.Tensor(
[[ 0.9924548   0.6718009 ]
 [ 0.5780469  -0.9187347 ]
 [ 0.97796816  0.25729635]
 [-0.9640764  -0.54721904]], shape=(4, 2), dtype=float32) tf.Tensor(
[[ 0.9924541   0.66725755]
 [ 0.57503873 -0.9187559 ]
 [ 0.97796226  0.25046855]
 [-0.9640871  -0.57426655]], shape=(4, 2), dtype=float32)


### Use Keras layer with GradientTape and trainable weights

In [10]:
class Linear(keras.layers.Layer):
    """y = w.x + b"""

    def __init__(self, units=32):
        super().__init__()
        self.units = units

    def build(self, input_shape):
        self.w = self.add_weight(
            shape=(input_shape[-1], self.units),
            initializer="random_normal",
            trainable=True,
        )
        self.b = self.add_weight(
            shape=(self.units,), initializer="random_normal", trainable=True
        )

    def call(self, inputs):
        return tf.matmul(inputs, self.w) + self.b

linear_layer = Linear(2)

In [11]:
x = tf.random.uniform(shape=(4,2))

loss_function = keras.losses.MeanSquaredError()
optimizer = keras.optimizers.SGD(learning_rate=1e-3)

for i in range(10000):
    with tf.GradientTape() as tape:
        tape.watch(x)
        y = tf.sqrt(x)
        y_pred = linear_layer(x)
        loss = loss_function(y_pred, y)
    grads = tape.gradient(loss, linear_layer.trainable_weights)
    optimizer.apply_gradients(zip(grads, linear_layer.trainable_weights))

In [12]:
print(y_pred, y)

tf.Tensor(
[[0.7051412  0.90015876]
 [0.8761788  0.6886853 ]
 [0.83623683 0.8709676 ]
 [0.34059483 0.5946534 ]], shape=(4, 2), dtype=float32) tf.Tensor(
[[0.6880582  0.96630543]
 [0.9706181  0.5882389 ]
 [0.85627097 0.89709115]
 [0.20102546 0.59746027]], shape=(4, 2), dtype=float32)
