In [1]:
import tensorflow as tf
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    print('gpu', gpu)
    tf.config.experimental.set_memory_growth(gpu, True)
    print('memory growth:' , tf.config.experimental.get_memory_growth(gpu))

gpu PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')
memory growth: True


In [2]:
import numpy as np
import matplotlib.pyplot as plt

Example of a simple computation using reverse mode differentiation

In [3]:
x = tf.Variable(3.0)
x.numpy()

3.0

In [4]:
with tf.GradientTape() as tape:
    y = x**2

In [5]:
#dy = 2x * dx
dy_dx = tape.gradient(y, x)
dy_dx.numpy()

6.0

Example with some more neural-like case

In [6]:
w = tf.Variable(tf.random.normal((3,2)), name='w')
b = tf.Variable(tf.zeros(2, dtype=tf.float32), name='b')
x = [[1., 2., 3.]]

In [7]:
w.numpy()

array([[ 0.56160814, -0.7140258 ],
       [ 0.15853585, -0.4588056 ],
       [-0.16721083,  2.3940017 ]], dtype=float32)

In [8]:
b.numpy()

array([0., 0.], dtype=float32)

In [9]:
x

[[1.0, 2.0, 3.0]]

In [10]:
with tf.GradientTape() as tape:
    y = x @ w + b
    loss = tf.reduce_mean(y**2)

In [11]:
[dl_dw, dl_db] = tape.gradient(loss, [w, b])

In [12]:
print(w.shape)
print(dl_dw.shape)
print("\n")
print(b.shape)
print(dl_db.shape)

(3, 2)
(3, 2)


(2,)
(2,)


### Gradients with respect to a model

In [15]:
layer = tf.keras.layers.Dense(2, activation='relu')
x = tf.constant([[1., 2., 3.]])

with tf.GradientTape() as tape:
    y = layer(x)
    loss = tf.reduce_mean(y**2)
    
#calculate gradients with respect to every trainable variable

grad = tape.gradient(loss, layer.trainable_variables)

In [16]:
print(grad)

[<tf.Tensor: shape=(3, 2), dtype=float32, numpy=
array([[0.       , 2.5058594],
       [0.       , 5.0117188],
       [0.       , 7.517578 ]], dtype=float32)>, <tf.Tensor: shape=(2,), dtype=float32, numpy=array([0.       , 2.5056038], dtype=float32)>]
