In [19]:
import numpy as np
import math
import random
import matplotlib
import matplotlib.pyplot as plt
from itertools import count

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

### Find gradients (derivatives) of differentiable functions using GradientTape

In [20]:
a = tf.random.normal(shape=(2, 2))
b = tf.random.normal(shape=(2, 2))

with tf.GradientTape() as tape:
    tape.watch(a) 
    c = tf.sqrt(tf.square(a) + tf.square(b))  
    
grads = tape.gradient(c, a)
print(grads)

tf.Tensor(
[[-0.41719413 -0.8246322 ]
 [-0.89293253  0.91085553]], shape=(2, 2), dtype=float32)


### Check result against derivative of function

In [21]:
a / tf.sqrt(tf.square(a) + tf.square(b))

<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[-0.41719413, -0.8246322 ],
       [-0.89293253,  0.9108555 ]], dtype=float32)>

### Repeat with sin x and cos x

In [22]:
x = tf.random.normal(shape=())

with tf.GradientTape() as tape:
    tape.watch(x)
    y = tf.sin(x) 
    
grads = tape.gradient(y, x)
print(grads, tf.cos(x))

tf.Tensor(0.94859457, shape=(), dtype=float32) tf.Tensor(0.94859457, shape=(), dtype=float32)


### Derivative with respect to a and b

In [23]:
a = tf.random.normal(shape=(2, 2))
b = tf.random.normal(shape=(2, 2))

with tf.GradientTape() as tape:
    tape.watch([a, b]) 
    c = tf.sqrt(tf.square(a) + tf.square(b))  
    
grads = tape.gradient(c, [a, b])

print(grads)
print(a / tf.sqrt(tf.square(a) + tf.square(b)))
print(b / tf.sqrt(tf.square(a) + tf.square(b)))

[<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[ 0.20429626,  0.6940333 ],
       [-0.7589702 ,  0.9915183 ]], dtype=float32)>, <tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[-0.9789092 ,  0.71994287],
       [ 0.6511253 , -0.12996784]], dtype=float32)>]
tf.Tensor(
[[ 0.20429625  0.6940333 ]
 [-0.7589702   0.99151826]], shape=(2, 2), dtype=float32)
tf.Tensor(
[[-0.97890913  0.71994287]
 [ 0.6511253  -0.12996784]], shape=(2, 2), dtype=float32)


### Use GradientTape with loss function and backpropagation to update weights

In [24]:
a = tf.random.uniform(shape=(4,2)) 
b = tf.random.uniform(shape=(4,2))
c = tf.random.uniform(shape=(4,2))
d = tf.random.uniform(shape=(4,2))

a = tf.Variable(a)
b = tf.Variable(b)
c = tf.Variable(c)
d = tf.Variable(d)

### Generate x and y and run GradientTape with a, b, c, and d as weights

In [25]:
x = tf.random.normal(shape=(4,2))

In [26]:
loss_function = keras.losses.MeanSquaredError()
optimizer = keras.optimizers.SGD(learning_rate=1e-3)

for i in range(10000):
    with tf.GradientTape() as tape:
        tape.watch(x)
        y = tf.sin(x) 
        y_pred = a + b*x + c*x**2 + d*x**3
        loss = loss_function(y_pred, y)
    grads = tape.gradient(loss, [a,b,c,d])
    optimizer.apply_gradients(zip(grads, [a,b,c,d]))

In [27]:
print(y_pred, y)

tf.Tensor(
[[ 0.73954475  0.9958941 ]
 [-0.11540518  0.36134112]
 [ 0.99766237 -0.8624366 ]
 [ 0.68297184 -0.08792038]], shape=(4, 2), dtype=float32) tf.Tensor(
[[ 0.7367965   0.9958812 ]
 [-0.15534252  0.32845432]
 [ 0.9976536  -0.86250705]
 [ 0.68260336 -0.09918554]], shape=(4, 2), dtype=float32)


### Use Keras layer with GradientTape and trainable weights

In [28]:
class Linear(keras.layers.Layer):
    """y = w.x + b"""

    def __init__(self, units=32):
        super().__init__()
        self.units = units

    def build(self, input_shape):
        self.w = self.add_weight(
            shape=(input_shape[-1], self.units),
            initializer="random_normal",
            trainable=True,
        )
        self.b = self.add_weight(
            shape=(self.units,), initializer="random_normal", trainable=True
        )

    def call(self, inputs):
        return tf.matmul(inputs, self.w) + self.b

linear_layer = Linear(2)

In [29]:
x = tf.random.uniform(shape=(4,2))

loss_function = keras.losses.MeanSquaredError()
optimizer = keras.optimizers.SGD(learning_rate=1e-3)

for i in range(10000):
    with tf.GradientTape() as tape:
        tape.watch(x)
        y = tf.sqrt(x)
        y_pred = linear_layer(x)
        loss = loss_function(y_pred, y)
    grads = tape.gradient(loss, linear_layer.trainable_weights)
    optimizer.apply_gradients(zip(grads, linear_layer.trainable_weights))

In [30]:
print(y_pred, y)

tf.Tensor(
[[0.26198223 0.27958876]
 [0.3467316  0.37732685]
 [0.25753865 0.27051944]
 [0.8441278  0.920033  ]], shape=(4, 2), dtype=float32) tf.Tensor(
[[0.21049963 0.23852402]
 [0.36083207 0.45041516]
 [0.23989944 0.17581922]
 [0.87942374 0.9570649 ]], shape=(4, 2), dtype=float32)
