TensorFlow follows *automatic differentiation*

In [2]:
import tensorflow as tf

consider the case of the loss function L = = (y - z)^2, where y = w.x + b

In [9]:
w = tf.Variable(1.0)
b = tf.Variable(0.5)

x = tf.convert_to_tensor([1.4])
y = tf.convert_to_tensor([2.1])

with tf.GradientTape(persistent=True) as tape:
    # the *persistent=True* is added so that it can be reused
    z = tf.add(tf.multiply(w, x), b)
    loss = tf.reduce_sum(tf.square(y - z))

In [10]:
dloss_dw = tape.gradient(loss, w)
tf.print('dL/dw: ', dloss_dw)

dL/dw:  -0.559999764


now wrt non-trainable tensors

In [11]:
# need to add tape.watch() for these variables
with tf.GradientTape() as tape:
    tape.watch(x)
    z = tf.add(tf.multiply(w, x), b)
    loss = tf.reduce_sum(tf.square(y - z))

In [12]:
dloss_dx = tape.gradient(loss, x)
tf.print('dL/sx: ', dloss_dx)

dL/sx:  [-0.399999857]


In [14]:
with tf.GradientTape(persistent=True) as tape:
    z = tf.add(tf.multiply(w, x), b)
    loss = tf.reduce_sum(tf.square(y - z))
dloss_db = tape.gradient(loss, b)
tf.print('dL/db:', dloss_db)
dloss_dw = tape.gradient(loss, w)
tf.print('dL/dw:', dloss_dw)

dL/db: -0.399999857
dL/dw: -0.559999764


to compute gradients of a loss term w.r.t. parameters of a model, we can define an optimizer and apply the gradients to optimize the model

In [15]:
optimizer = tf.keras.optimizers.SGD()
optimizer.apply_gradients(zip([dloss_dw, dloss_db], [w, b]))

<tf.Variable 'UnreadVariable' shape=() dtype=int64, numpy=1>

In [16]:
tf.print('Updated w:', w)
tf.print('Updated bias:', b)

Updated w: 1.0056
Updated bias: 0.504
