### 近似求导(derivative/gradient)

#### 方法一：写求导函数

In [2]:
import tensorflow as tf

Init Plugin
Init Graph Optimizer
Init Kernel


In [3]:
def f(x):
    """
    f = 3x^2 + 2x - 1
    f' = 6x + 2
    """
    return 3. * x ** 2 + 2. * x - 1


def approximate_derivative(f, x, eps=1e-3):
    return (f(x + eps) - f(x - eps)) / (2. * eps)


print(approximate_derivative(f, 1.))

7.999999999999119


In [4]:
def g(x1, x2):
    """
    (x1 + 5) * x2^2
    """
    return (x1 + 5) * x2 ** 2


def approximate_gradient(g, x1, x2, eps=1e-3):
    dg_x1 = approximate_derivative(lambda x: g(x, x2), x1, eps)
    dg_x2 = approximate_derivative(lambda x: g(x1, x), x2, eps)
    return dg_x1, dg_x2


print(approximate_gradient(g, 2., 3.))

(8.999999999993236, 41.999999999994486)


#### 方法二：使用 tf.GradientTape()

In [5]:
x1 = tf.Variable(2.)
x2 = tf.Variable(3.)
with tf.GradientTape() as tape:
    # func
    z = g(x1, x2)

dz_x1x2 = tape.gradient(z, [x1, x2])
print(dz_x1x2)

Metal device set to: Apple M1
[<tf.Tensor: shape=(), dtype=float32, numpy=8.999998>, <tf.Tensor: shape=(), dtype=float32, numpy=41.999996>]


2021-08-08 17:40:05.638354: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2021-08-08 17:40:05.638832: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [6]:
x1 = tf.constant(2.)
x2 = tf.constant(3.)
with tf.GradientTape() as tape:
    tape.watch(x1)
    tape.watch(x2)
    # func
    z = g(x1, x2)

dz_x1x2 = tape.gradient(z, [x1, x2])
print(dz_x1x2)

[<tf.Tensor: shape=(), dtype=float32, numpy=8.999998>, <tf.Tensor: shape=(), dtype=float32, numpy=41.999996>]


In [7]:
x = tf.Variable(5.)
with tf.GradientTape() as tape:
    z1 = 3 * x
    z2 = x ** 2
# z1' + z2'
tape.gradient([z1, z2], x)

<tf.Tensor: shape=(), dtype=float32, numpy=12.999999>

#### 方法二：使用 tf.GradientTape() 求二阶导数

In [8]:
x1 = tf.Variable(2.0)
x2 = tf.Variable(3.0)

with tf.GradientTape(persistent=True) as outer_tape:
    with tf.GradientTape(persistent=True) as inner_tape:
        z = g(x1, x2)
    inner_grads = inner_tape.gradient(z, [x1, x2])
outer_grads = [outer_tape.gradient(inner_grad, [x1, x2])
               for inner_grad in inner_grads]

print(outer_grads)

del inner_tape
del outer_tape

[[None, <tf.Tensor: shape=(), dtype=float32, numpy=5.9999995>], [<tf.Tensor: shape=(), dtype=float32, numpy=5.9999995>, <tf.Tensor: shape=(), dtype=float32, numpy=14.0>]]


#### 实现梯度下降 (Gradient Descent) 算法

In [9]:
# 写法一

learning_rate = 0.1
x = tf.Variable(0.0)

# 梯度下降 100 次
for _ in range(100):
    with tf.GradientTape() as tape:
        # f = 3x^2 + 2x - 1
        z = f(x)
        dz_dx = tape.gradient(z, x)
        # x = x - lr * grad
        x.assign_sub(learning_rate * dz_dx)
print(x)


<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=-0.33333334>


In [11]:
# 写法二

learning_rate = 0.1
x = tf.Variable(0.0)

optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate)

# 梯度下降 100 次
for _ in range(100):
    with tf.GradientTape() as tape:
        # f = 3x^2 + 2x - 1
        z = f(x)
        dz_dx = tape.gradient(z, x)
        # x = x - lr * grad
        optimizer.apply_gradients([(dz_dx, x)])
print(x)



<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=-0.33333334>
